root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. tcp_set_state
  3. tcp_select_window
  4. tcp_find_established
  5. tcp_close_pending
  6. tcp_dequeue_established
  7. tcp_time_wait
  8. tcp_retransmit
  9. tcp_err
  10. tcp_readable
  11. tcp_select
  12. tcp_ioctl
  13. tcp_check
  14. tcp_send_check
  15. tcp_send_skb
  16. tcp_dequeue_partial
  17. tcp_send_partial
  18. tcp_enqueue_partial
  19. tcp_send_ack
  20. tcp_build_header
  21. tcp_write
  22. tcp_sendto
  23. tcp_read_wakeup
  24. cleanup_rbuf
  25. tcp_read_urg
  26. tcp_read
  27. tcp_shutdown
  28. tcp_recvfrom
  29. tcp_reset
  30. tcp_options
  31. default_mask
  32. tcp_init_seq
  33. tcp_conn_request
  34. tcp_close
  35. tcp_write_xmit
  36. tcp_ack
  37. tcp_data
  38. tcp_check_urg
  39. tcp_urg
  40. tcp_fin
  41. tcp_accept
  42. tcp_connect
  43. tcp_sequence
  44. tcp_clean_end
  45. tcp_rcv
  46. tcp_write_wakeup
  47. tcp_send_probe0
  48. tcp_setsockopt
  49. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@no.unit.nvg>
  20  *
  21  * Fixes:       
  22  *              Alan Cox        :       Numerous verify_area() calls
  23  *              Alan Cox        :       Set the ACK bit on a reset
  24  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  25  *                                      and was trying to connect (tcp_err()).
  26  *              Alan Cox        :       All icmp error handling was broken
  27  *                                      pointers passed where wrong and the
  28  *                                      socket was looked up backwards. Nobody
  29  *                                      tested any icmp error code obviously.
  30  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  31  *                                      on errors. select behaves and the icmp error race
  32  *                                      has gone by moving it into sock.c
  33  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  34  *                                      packets for unknown sockets.
  35  *              Alan Cox        :       tcp option processing.
  36  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  37  *              Herp Rosmanith  :       More reset fixes
  38  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  39  *                                      any kind of RST is right out.
  40  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  41  *                                      otherwise odd bits of prattle escape still
  42  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  43  *                                      LAN workplace lockups.
  44  *              Alan Cox        :       Some tidyups using the new skb list facilities
  45  *              Alan Cox        :       sk->keepopen now seems to work
  46  *              Alan Cox        :       Pulls options out correctly on accepts
  47  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  48  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  49  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  50  *              Alan Cox        :       Added some better commenting, as the tcp is hard to follow
  51  *              Alan Cox        :       Removed incorrect check for 20 * psh
  52  *      Michael O'Reilly        :       ack < copied bug fix.
  53  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  54  *              Alan Cox        :       FIN with no memory -> CRASH
  55  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  56  *              Alan Cox        :       Added TCP options (SOL_TCP)
  57  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  58  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  59  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  60  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  61  *              Alan Cox        :       Put in missing check for SYN bit.
  62  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  63  *                                      window non shrink trick.
  64  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  65  *              Charles Hedrick :       TCP fixes
  66  *              Toomas Tamm     :       TCP window fixes
  67  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  68  *              Charles Hedrick :       Rewrote most of it to actually work
  69  *              Linus           :       Rewrote tcp_read() and URG handling
  70  *                                      completely
  71  *              Gerhard Koerting:       Fixed some missing timer handling
  72  *              Matthew Dillon  :       Reworked TCP machine states as per RFC
  73  *              Gerhard Koerting:       PC/TCP workarounds
  74  *              Adam Caldwell   :       Assorted timer/timing errors
  75  *              Matthew Dillon  :       Fixed another RST bug
  76  *              Alan Cox        :       Move to kernel side addressing changes.
  77  *              Alan Cox        :       Beginning work on TCP fastpathing (not yet usable)
  78  *              Arnt Gulbrandsen:       Turbocharged tcp_check() routine.
  79  *              Alan Cox        :       TCP fast path debugging
  80  *              Alan Cox        :       Window clamping
  81  *              Michael Riepe   :       Bug in tcp_check()
  82  *              Matt Dillon     :       More TCP improvements and RST bug fixes
  83  *              Matt Dillon     :       Yet more small nasties remove from the TCP code
  84  *                                      (Be very nice to this man if tcp finally works 100%) 8)
  85  *              Alan Cox        :       BSD accept semantics. 
  86  *              Alan Cox        :       Reset on closedown bug.
  87  *      Peter De Schrijver      :       ENOTCONN check missing in tcp_sendto().
  88  *              Michael Pall    :       Handle select() after URG properly in all cases.
  89  *              Michael Pall    :       Undo the last fix in tcp_read_urg() (multi URG PUSH broke rlogin).
  90  *              Michael Pall    :       Fix the multi URG PUSH problem in tcp_readable(), select() after URG works now.
  91  *              Michael Pall    :       recv(...,MSG_OOB) never blocks in the BSD api.
  92  *              Alan Cox        :       Changed the semantics of sk->socket to 
  93  *                                      fix a race and a signal problem with
  94  *                                      accept() and async I/O.
  95  *              Alan Cox        :       Relaxed the rules on tcp_sendto().
  96  *              Yury Shevchuk   :       Really fixed accept() blocking problem.
  97  *              Craig I. Hagan  :       Allow for BSD compatible TIME_WAIT for
  98  *                                      clients/servers which listen in on
  99  *                                      fixed ports.
 100  *              Alan Cox        :       Cleaned the above up and shrank it to
 101  *                                      a sensible code size.
 102  *              Alan Cox        :       Self connect lockup fix.
 103  *              Alan Cox        :       No connect to multicast.
 104  *              Ross Biro       :       Close unaccepted children on master
 105  *                                      socket close.
 106  *              Alan Cox        :       Reset tracing code.
 107  *              Alan Cox        :       Spurious resets on shutdown.
 108  *
 109  *
 110  * To Fix:
 111  *                      Fast path the code. Two things here - fix the window calculation
 112  *              so it doesn't iterate over the queue, also spot packets with no funny
 113  *              options arriving in order and process directly.
 114  *
 115  *              This program is free software; you can redistribute it and/or
 116  *              modify it under the terms of the GNU General Public License
 117  *              as published by the Free Software Foundation; either version
 118  *              2 of the License, or(at your option) any later version.
 119  *
 120  * Description of States:
 121  *
 122  *      TCP_SYN_SENT            sent a connection request, waiting for ack
 123  *
 124  *      TCP_SYN_RECV            received a connection request, sent ack,
 125  *                              waiting for final ack in three-way handshake.
 126  *
 127  *      TCP_ESTABLISHED         connection established
 128  *
 129  *      TCP_FIN_WAIT1           our side has shutdown, waiting to complete
 130  *                              transmission of remaining buffered data
 131  *
 132  *      TCP_FIN_WAIT2           all buffered data sent, waiting for remote
 133  *                              to shutdown
 134  *
 135  *      TCP_CLOSING             both sides have shutdown but we still have
 136  *                              data we have to finish sending
 137  *
 138  *      TCP_TIME_WAIT           timeout to catch resent junk before entering
 139  *                              closed, can only be entered from FIN_WAIT2
 140  *                              or CLOSING.  Required because the other end
 141  *                              may not have gotten our last ACK causing it
 142  *                              to retransmit the data packet (which we ignore)
 143  *
 144  *      TCP_CLOSE_WAIT          remote side has shutdown and is waiting for
 145  *                              us to finish writing our data and to shutdown
 146  *                              (we have to close() to move on to LAST_ACK)
 147  *
 148  *      TCP_LAST_ACK            out side has shutdown after remote has
 149  *                              shutdown.  There may still be data in our
 150  *                              buffer that we have to finish sending
 151  *              
 152  *      TCP_CLOSE               socket is finished
 153  */
 154 #include <linux/types.h>
 155 #include <linux/sched.h>
 156 #include <linux/mm.h>
 157 #include <linux/string.h>
 158 #include <linux/socket.h>
 159 #include <linux/sockios.h>
 160 #include <linux/termios.h>
 161 #include <linux/in.h>
 162 #include <linux/fcntl.h>
 163 #include <linux/inet.h>
 164 #include <linux/netdevice.h>
 165 #include "snmp.h"
 166 #include "ip.h"
 167 #include "protocol.h"
 168 #include "icmp.h"
 169 #include "tcp.h"
 170 #include <linux/skbuff.h>
 171 #include "sock.h"
 172 #include "route.h"
 173 #include <linux/errno.h>
 174 #include <linux/timer.h>
 175 #include <asm/system.h>
 176 #include <asm/segment.h>
 177 #include <linux/mm.h>
 178 
 179 #undef TCP_FASTPATH
 180 
 181 #define SEQ_TICK 3
 182 unsigned long seq_offset;
 183 struct tcp_mib  tcp_statistics;
 184 
 185 static void tcp_close(struct sock *sk, int timeout);
 186 
 187 #ifdef TCP_FASTPATH
 188 unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0;
 189 #endif
 190 
 191 
 192 static __inline__ int min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 193 {
 194         if (a < b) 
 195                 return(a);
 196         return(b);
 197 }
 198 
 199 #undef STATE_TRACE
 200 
 201 static __inline__ void tcp_set_state(struct sock *sk, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 202 {
 203         if(sk->state==TCP_ESTABLISHED)
 204                 tcp_statistics.TcpCurrEstab--;
 205 #ifdef STATE_TRACE
 206         if(sk->debug)
 207                 printk("TCP sk=%s, State %d -> %d\n",sk, sk->state,state);
 208 #endif  
 209         sk->state=state;
 210         if(state==TCP_ESTABLISHED)
 211                 tcp_statistics.TcpCurrEstab++;
 212 }
 213 
 214 /* This routine picks a TCP windows for a socket based on
 215    the following constraints
 216    
 217    1. The window can never be shrunk once it is offered (RFC 793)
 218    2. We limit memory per socket
 219    
 220    For now we use NET2E3's heuristic of offering half the memory
 221    we have handy. All is not as bad as this seems however because
 222    of two things. Firstly we will bin packets even within the window
 223    in order to get the data we are waiting for into the memory limit.
 224    Secondly we bin common duplicate forms at receive time
 225    
 226    Better heuristics welcome
 227 */
 228    
 229 int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 230 {
 231         int new_window = sk->prot->rspace(sk);
 232         
 233         if(sk->window_clamp)
 234                 new_window=min(sk->window_clamp,new_window);
 235 /*
 236  * two things are going on here.  First, we don't ever offer a
 237  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 238  * receiver side of SWS as specified in RFC1122.
 239  * Second, we always give them at least the window they
 240  * had before, in order to avoid retracting window.  This
 241  * is technically allowed, but RFC1122 advises against it and
 242  * in practice it causes trouble.
 243  */
 244         if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window)
 245                 return(sk->window);
 246         return(new_window);
 247 }
 248 
 249 /*
 250  *      Find someone to 'accept'. Must be called with
 251  *      sk->inuse=1 or cli()
 252  */ 
 253 
 254 static struct sk_buff *tcp_find_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 255 {
 256         struct sk_buff *p=skb_peek(&s->receive_queue);
 257         if(p==NULL)
 258                 return NULL;
 259         do
 260         {
 261                 if(p->sk->state == TCP_ESTABLISHED || p->sk->state >= TCP_FIN_WAIT1)
 262                         return p;
 263                 p=p->next;
 264         }
 265         while(p!=(struct sk_buff *)&s->receive_queue);
 266         return NULL;
 267 }
 268 
 269 
 270 /* 
 271  *      This routine closes sockets which have been at least partially
 272  *      opened, but not yet accepted. Currently it is only called by
 273  *      tcp_close, and timeout mirrors the value there. 
 274  */
 275 
 276 static void tcp_close_pending (struct sock *sk, int timeout) 
     /* [previous][next][first][last][top][bottom][index][help] */
 277 {
 278         struct sk_buff *skb;
 279 
 280         while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
 281                 tcp_close(skb->sk, timeout);
 282                 kfree_skb(skb, FREE_READ);
 283         }
 284         return;
 285 }
 286 
 287 static struct sk_buff *tcp_dequeue_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 288 {
 289         struct sk_buff *skb;
 290         unsigned long flags;
 291         save_flags(flags);
 292         cli(); 
 293         skb=tcp_find_established(s);
 294         if(skb!=NULL)
 295                 skb_unlink(skb);        /* Take it off the queue */
 296         restore_flags(flags);
 297         return skb;
 298 }
 299 
 300 
 301 /*
 302  *      Enter the time wait state. 
 303  */
 304 
 305 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 306 {
 307         tcp_set_state(sk,TCP_TIME_WAIT);
 308         sk->shutdown = SHUTDOWN_MASK;
 309         if (!sk->dead)
 310                 sk->state_change(sk);
 311         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 312 }
 313 
 314 /*
 315  *      A timer event has trigger a tcp retransmit timeout. The
 316  *      socket xmit queue is ready and set up to send. Because
 317  *      the ack receive code keeps the queue straight we do
 318  *      nothing clever here.
 319  */
 320 
 321 static void tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 322 {
 323         if (all) 
 324         {
 325                 ip_retransmit(sk, all);
 326                 return;
 327         }
 328 
 329         sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 330         /* sk->ssthresh in theory can be zero.  I guess that's OK */
 331         sk->cong_count = 0;
 332 
 333         sk->cong_window = 1;
 334 
 335         /* Do the actual retransmit. */
 336         ip_retransmit(sk, all);
 337 }
 338 
 339 
 340 /*
 341  * This routine is called by the ICMP module when it gets some
 342  * sort of error condition.  If err < 0 then the socket should
 343  * be closed and the error returned to the user.  If err > 0
 344  * it's just the icmp type << 8 | icmp code.  After adjustment
 345  * header points to the first 8 bytes of the tcp header.  We need
 346  * to find the appropriate port.
 347  */
 348 
 349 void tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 350         unsigned long saddr, struct inet_protocol *protocol)
 351 {
 352         struct tcphdr *th;
 353         struct sock *sk;
 354         struct iphdr *iph=(struct iphdr *)header;
 355   
 356         header+=4*iph->ihl;
 357    
 358 
 359         th =(struct tcphdr *)header;
 360         sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr);
 361 
 362         if (sk == NULL) 
 363                 return;
 364   
 365         if(err<0)
 366         {
 367                 sk->err = -err;
 368                 sk->error_report(sk);
 369                 return;
 370         }
 371 
 372         if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) 
 373         {
 374                 /*
 375                  * FIXME:
 376                  * For now we will just trigger a linear backoff.
 377                  * The slow start code should cause a real backoff here.
 378                  */
 379                 if (sk->cong_window > 4)
 380                         sk->cong_window--;
 381                 return;
 382         }
 383 
 384 /*      sk->err = icmp_err_convert[err & 0xff].errno;  -- moved as TCP should hide non fatals internally (and does) */
 385 
 386         /*
 387          * If we've already connected we will keep trying
 388          * until we time out, or the user gives up.
 389          */
 390 
 391         if (icmp_err_convert[err & 0xff].fatal || sk->state == TCP_SYN_SENT) 
 392         {
 393                 if (sk->state == TCP_SYN_SENT) 
 394                 {
 395                         tcp_statistics.TcpAttemptFails++;
 396                         tcp_set_state(sk,TCP_CLOSE);
 397                         sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 398                 }
 399                 sk->err = icmp_err_convert[err & 0xff].errno;           
 400         }
 401         return;
 402 }
 403 
 404 
 405 /*
 406  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 407  *      in the received data queue (ie a frame missing that needs sending to us)
 408  */
 409 
 410 static int tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 411 {
 412         unsigned long counted;
 413         unsigned long amount;
 414         struct sk_buff *skb;
 415         int sum;
 416         unsigned long flags;
 417 
 418         if(sk && sk->debug)
 419                 printk("tcp_readable: %p - ",sk);
 420 
 421         save_flags(flags);
 422         cli();
 423         if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 424         {
 425                 restore_flags(flags);
 426                 if(sk && sk->debug) 
 427                         printk("empty\n");
 428                 return(0);
 429         }
 430   
 431         counted = sk->copied_seq+1;     /* Where we are at the moment */
 432         amount = 0;
 433   
 434         /* Do until a push or until we are out of data. */
 435         do 
 436         {
 437                 if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 438                         break;
 439                 sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 440                 if (skb->h.th->syn)
 441                         sum++;
 442                 if (sum > 0) 
 443                 {                                       /* Add it up, move on */
 444                         amount += sum;
 445                         if (skb->h.th->syn) 
 446                                 amount--;
 447                         counted += sum;
 448                 }
 449                 /*
 450                  * Don't count urg data ... but do it in the right place!
 451                  * Consider: "old_data (ptr is here) URG PUSH data"
 452                  * The old code would stop at the first push because
 453                  * it counted the urg (amount==1) and then does amount--
 454                  * *after* the loop.  This means tcp_readable() always
 455                  * returned zero if any URG PUSH was in the queue, even
 456                  * though there was normal data available. If we subtract
 457                  * the urg data right here, we even get it to work for more
 458                  * than one URG PUSH skb without normal data.
 459                  * This means that select() finally works now with urg data
 460                  * in the queue.  Note that rlogin was never affected
 461                  * because it doesn't use select(); it uses two processes
 462                  * and a blocking read().  And the queue scan in tcp_read()
 463                  * was correct.  Mike <pall@rz.uni-karlsruhe.de>
 464                  */
 465                 if (skb->h.th->urg)
 466                         amount--;       /* don't count urg data */
 467                 if (amount && skb->h.th->psh) break;
 468                 skb = skb->next;
 469         }
 470         while(skb != (struct sk_buff *)&sk->receive_queue);
 471 
 472         restore_flags(flags);
 473         if(sk->debug)
 474                 printk("got %lu bytes.\n",amount);
 475         return(amount);
 476 }
 477 
 478 
 479 /*
 480  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 481  *      listening socket has a receive queue of sockets to accept.
 482  */
 483 
 484 static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 485 {
 486         sk->inuse = 1;
 487 
 488         switch(sel_type) 
 489         {
 490                 case SEL_IN:
 491                         select_wait(sk->sleep, wait);
 492                         if (skb_peek(&sk->receive_queue) != NULL) 
 493                         {
 494                                 if ((sk->state == TCP_LISTEN && tcp_find_established(sk)) || tcp_readable(sk)) 
 495                                 {
 496                                         release_sock(sk);
 497                                         return(1);
 498                                 }
 499                         }
 500                         if (sk->err != 0)       /* Receiver error */
 501                         {
 502                                 release_sock(sk);
 503                                 return(1);
 504                         }
 505                         if (sk->shutdown & RCV_SHUTDOWN) 
 506                         {
 507                                 release_sock(sk);
 508                                 return(1);
 509                         } 
 510                         release_sock(sk);
 511                         return(0);
 512                 case SEL_OUT:
 513                         select_wait(sk->sleep, wait);
 514                         if (sk->shutdown & SEND_SHUTDOWN) 
 515                         {
 516                                 /* FIXME: should this return an error? */
 517                                 release_sock(sk);
 518                                 return(0);
 519                         }
 520 
 521                         /*
 522                          * This is now right thanks to a small fix
 523                          * by Matt Dillon.
 524                          */
 525                         
 526                         if (sk->prot->wspace(sk) >= sk->mtu+128+sk->prot->max_header) 
 527                         {
 528                                 release_sock(sk);
 529                                 /* This should cause connect to work ok. */
 530                                 if (sk->state == TCP_SYN_RECV ||
 531                                     sk->state == TCP_SYN_SENT) return(0);
 532                                 return(1);
 533                         }
 534                         release_sock(sk);
 535                         return(0);
 536                 case SEL_EX:
 537                         select_wait(sk->sleep,wait);
 538                         if (sk->err || sk->urg_data) 
 539                         {
 540                                 release_sock(sk);
 541                                 return(1);
 542                         }
 543                         release_sock(sk);
 544                         return(0);
 545         }
 546 
 547         release_sock(sk);
 548         return(0);
 549 }
 550 
 551 
 552 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 553 {
 554         int err;
 555         switch(cmd) 
 556         {
 557 
 558                 case TIOCINQ:
 559 #ifdef FIXME    /* FIXME: */
 560                 case FIONREAD:
 561 #endif
 562                 {
 563                         unsigned long amount;
 564 
 565                         if (sk->state == TCP_LISTEN) 
 566                                 return(-EINVAL);
 567 
 568                         sk->inuse = 1;
 569                         amount = tcp_readable(sk);
 570                         release_sock(sk);
 571                         err=verify_area(VERIFY_WRITE,(void *)arg,
 572                                                    sizeof(unsigned long));
 573                         if(err)
 574                                 return err;
 575                         put_fs_long(amount,(unsigned long *)arg);
 576                         return(0);
 577                 }
 578                 case SIOCATMARK:
 579                 {
 580                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
 581 
 582                         err = verify_area(VERIFY_WRITE,(void *) arg,
 583                                                   sizeof(unsigned long));
 584                         if (err)
 585                                 return err;
 586                         put_fs_long(answ,(int *) arg);
 587                         return(0);
 588                 }
 589                 case TIOCOUTQ:
 590                 {
 591                         unsigned long amount;
 592 
 593                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 594                         amount = sk->prot->wspace(sk);
 595                         err=verify_area(VERIFY_WRITE,(void *)arg,
 596                                                    sizeof(unsigned long));
 597                         if(err)
 598                                 return err;
 599                         put_fs_long(amount,(unsigned long *)arg);
 600                         return(0);
 601                 }
 602                 default:
 603                         return(-EINVAL);
 604         }
 605 }
 606 
 607 
 608 /*
 609  *      This routine computes a TCP checksum. 
 610  */
 611  
 612 unsigned short tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 613           unsigned long saddr, unsigned long daddr)
 614 {     
 615         unsigned long sum;
 616    
 617         if (saddr == 0) saddr = ip_my_addr();
 618 
 619 /*
 620  * stupid, gcc complains when I use just one __asm__ block,
 621  * something about too many reloads, but this is just two
 622  * instructions longer than what I want
 623  */
 624         __asm__("
 625             addl %%ecx, %%ebx
 626             adcl %%edx, %%ebx
 627             adcl $0, %%ebx
 628             "
 629         : "=b"(sum)
 630         : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 631         : "bx", "cx", "dx" );
 632         __asm__("
 633             movl %%ecx, %%edx
 634             cld
 635             cmpl $32, %%ecx
 636             jb 2f
 637             shrl $5, %%ecx
 638             clc
 639 1:          lodsl
 640             adcl %%eax, %%ebx
 641             lodsl
 642             adcl %%eax, %%ebx
 643             lodsl
 644             adcl %%eax, %%ebx
 645             lodsl
 646             adcl %%eax, %%ebx
 647             lodsl
 648             adcl %%eax, %%ebx
 649             lodsl
 650             adcl %%eax, %%ebx
 651             lodsl
 652             adcl %%eax, %%ebx
 653             lodsl
 654             adcl %%eax, %%ebx
 655             loop 1b
 656             adcl $0, %%ebx
 657             movl %%edx, %%ecx
 658 2:          andl $28, %%ecx
 659             je 4f
 660             shrl $2, %%ecx
 661             clc
 662 3:          lodsl
 663             adcl %%eax, %%ebx
 664             loop 3b
 665             adcl $0, %%ebx
 666 4:          movl $0, %%eax
 667             testw $2, %%dx
 668             je 5f
 669             lodsw
 670             addl %%eax, %%ebx
 671             adcl $0, %%ebx
 672             movw $0, %%ax
 673 5:          test $1, %%edx
 674             je 6f
 675             lodsb
 676             addl %%eax, %%ebx
 677             adcl $0, %%ebx
 678 6:          movl %%ebx, %%eax
 679             shrl $16, %%eax
 680             addw %%ax, %%bx
 681             adcw $0, %%bx
 682             "
 683         : "=b"(sum)
 684         : "0"(sum), "c"(len), "S"(th)
 685         : "ax", "bx", "cx", "dx", "si" );
 686 
 687         /* We only want the bottom 16 bits, but we never cleared the top 16. */
 688   
 689         return((~sum) & 0xffff);
 690 }
 691 
 692 
 693 
 694 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 695                 unsigned long daddr, int len, struct sock *sk)
 696 {
 697         th->check = 0;
 698         th->check = tcp_check(th, len, saddr, daddr);
 699         return;
 700 }
 701 
 702 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 703 {
 704         int size;
 705         struct tcphdr * th = skb->h.th;
 706 
 707         /* length of packet (not counting length of pre-tcp headers) */
 708         size = skb->len - ((unsigned char *) th - skb->data);
 709 
 710         /* sanity check it.. */
 711         if (size < sizeof(struct tcphdr) || size > skb->len) 
 712         {
 713                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 714                         skb, skb->data, th, skb->len);
 715                 kfree_skb(skb, FREE_WRITE);
 716                 return;
 717         }
 718 
 719         /* If we have queued a header size packet.. */
 720         if (size == sizeof(struct tcphdr)) 
 721         {
 722                 /* If its got a syn or fin its notionally included in the size..*/
 723                 if(!th->syn && !th->fin) 
 724                 {
 725                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 726                         kfree_skb(skb,FREE_WRITE);
 727                         return;
 728                 }
 729         }
 730 
 731         tcp_statistics.TcpOutSegs++;  
 732 
 733         skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
 734         if (after(skb->h.seq, sk->window_seq) ||
 735             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 736              sk->packets_out >= sk->cong_window) 
 737         {
 738                 /* checksum will be supplied by tcp_write_xmit.  So
 739                  * we shouldn't need to set it at all.  I'm being paranoid */
 740                 th->check = 0;
 741                 if (skb->next != NULL) 
 742                 {
 743                         printk("tcp_send_partial: next != NULL\n");
 744                         skb_unlink(skb);
 745                 }
 746                 skb_queue_tail(&sk->write_queue, skb);
 747                 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 748                     sk->send_head == NULL &&
 749                     sk->ack_backlog == 0)
 750                         reset_timer(sk, TIME_PROBE0, sk->rto);
 751         } 
 752         else 
 753         {
 754                 th->ack_seq = ntohl(sk->acked_seq);
 755                 th->window = ntohs(tcp_select_window(sk));
 756 
 757                 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
 758 
 759                 sk->sent_seq = sk->write_seq;
 760                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 761         }
 762 }
 763 
 764 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 765 {
 766         struct sk_buff * skb;
 767         unsigned long flags;
 768 
 769         save_flags(flags);
 770         cli();
 771         skb = sk->partial;
 772         if (skb) {
 773                 sk->partial = NULL;
 774                 del_timer(&sk->partial_timer);
 775         }
 776         restore_flags(flags);
 777         return skb;
 778 }
 779 
 780 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 781 {
 782         struct sk_buff *skb;
 783 
 784         if (sk == NULL)
 785                 return;
 786         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 787                 tcp_send_skb(sk, skb);
 788 }
 789 
 790 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 791 {
 792         struct sk_buff * tmp;
 793         unsigned long flags;
 794 
 795         save_flags(flags);
 796         cli();
 797         tmp = sk->partial;
 798         if (tmp)
 799                 del_timer(&sk->partial_timer);
 800         sk->partial = skb;
 801         init_timer(&sk->partial_timer);
 802         sk->partial_timer.expires = HZ;
 803         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 804         sk->partial_timer.data = (unsigned long) sk;
 805         add_timer(&sk->partial_timer);
 806         restore_flags(flags);
 807         if (tmp)
 808                 tcp_send_skb(sk, tmp);
 809 }
 810 
 811 
 812 /*
 813  *      This routine sends an ack and also updates the window. 
 814  */
 815  
 816 static void tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 817              struct sock *sk,
 818              struct tcphdr *th, unsigned long daddr)
 819 {
 820         struct sk_buff *buff;
 821         struct tcphdr *t1;
 822         struct device *dev = NULL;
 823         int tmp;
 824 
 825         if(sk->zapped)
 826                 return;         /* We have been reset, we may not send again */
 827         /*
 828          * We need to grab some memory, and put together an ack,
 829          * and then put it into the queue to be sent.
 830          */
 831 
 832         buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 833         if (buff == NULL) 
 834         {
 835                 /* Force it to send an ack. */
 836                 sk->ack_backlog++;
 837                 if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) 
 838                 {
 839                         reset_timer(sk, TIME_WRITE, 10);
 840                 }
 841                 return;
 842         }
 843 
 844         buff->len = sizeof(struct tcphdr);
 845         buff->sk = sk;
 846         buff->localroute = sk->localroute;
 847         t1 =(struct tcphdr *) buff->data;
 848 
 849         /* Put in the IP header and routing stuff. */
 850         tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 851                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 852         if (tmp < 0) 
 853         {
 854                 buff->free=1;
 855                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 856                 return;
 857         }
 858         buff->len += tmp;
 859         t1 =(struct tcphdr *)((char *)t1 +tmp);
 860 
 861         /* FIXME: */
 862         memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 863 
 864         /*
 865          *      Swap the send and the receive. 
 866          */
 867          
 868         t1->dest = th->source;
 869         t1->source = th->dest;
 870         t1->seq = ntohl(sequence);
 871         t1->ack = 1;
 872         sk->window = tcp_select_window(sk);
 873         t1->window = ntohs(sk->window);
 874         t1->res1 = 0;
 875         t1->res2 = 0;
 876         t1->rst = 0;
 877         t1->urg = 0;
 878         t1->syn = 0;
 879         t1->psh = 0;
 880         t1->fin = 0;
 881         if (ack == sk->acked_seq) 
 882         {
 883                 sk->ack_backlog = 0;
 884                 sk->bytes_rcv = 0;
 885                 sk->ack_timed = 0;
 886                 if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
 887                                   && sk->timeout == TIME_WRITE) 
 888                 {
 889                         if(sk->keepopen) {
 890                                 reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 891                         } else {
 892                                 delete_timer(sk);
 893                         }
 894                 }
 895         }
 896         t1->ack_seq = ntohl(ack);
 897         t1->doff = sizeof(*t1)/4;
 898         tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 899         if (sk->debug)
 900                  printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 901         tcp_statistics.TcpOutSegs++;
 902         sk->prot->queue_xmit(sk, dev, buff, 1);
 903 }
 904 
 905 
 906 /* 
 907  *      This routine builds a generic TCP header. 
 908  */
 909  
 910 int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 911 {
 912 
 913         /* FIXME: want to get rid of this. */
 914         memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 915         th->seq = htonl(sk->write_seq);
 916         th->psh =(push == 0) ? 1 : 0;
 917         th->doff = sizeof(*th)/4;
 918         th->ack = 1;
 919         th->fin = 0;
 920         sk->ack_backlog = 0;
 921         sk->bytes_rcv = 0;
 922         sk->ack_timed = 0;
 923         th->ack_seq = htonl(sk->acked_seq);
 924         sk->window = tcp_select_window(sk);
 925         th->window = htons(sk->window);
 926 
 927         return(sizeof(*th));
 928 }
 929 
 930 /*
 931  *      This routine copies from a user buffer into a socket,
 932  *      and starts the transmit system.
 933  */
 934 
 935 static int tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 936           int len, int nonblock, unsigned flags)
 937 {
 938         int copied = 0;
 939         int copy;
 940         int tmp;
 941         struct sk_buff *skb;
 942         struct sk_buff *send_tmp;
 943         unsigned char *buff;
 944         struct proto *prot;
 945         struct device *dev = NULL;
 946 
 947         sk->inuse=1;
 948         prot = sk->prot;
 949         while(len > 0) 
 950         {
 951                 if (sk->err) 
 952                 {                       /* Stop on an error */
 953                         release_sock(sk);
 954                         if (copied) 
 955                                 return(copied);
 956                         tmp = -sk->err;
 957                         sk->err = 0;
 958                         return(tmp);
 959                 }
 960 
 961         /*
 962          *      First thing we do is make sure that we are established. 
 963          */
 964         
 965                 if (sk->shutdown & SEND_SHUTDOWN) 
 966                 {
 967                         release_sock(sk);
 968                         sk->err = EPIPE;
 969                         if (copied) 
 970                                 return(copied);
 971                         sk->err = 0;
 972                         return(-EPIPE);
 973                 }
 974 
 975 
 976         /* 
 977          *      Wait for a connection to finish.
 978          */
 979         
 980                 while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
 981                 {
 982                         if (sk->err) 
 983                         {
 984                                 release_sock(sk);
 985                                 if (copied) 
 986                                         return(copied);
 987                                 tmp = -sk->err;
 988                                 sk->err = 0;
 989                                 return(tmp);
 990                         }
 991 
 992                         if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
 993                         {
 994                                 release_sock(sk);
 995                                 if (copied) 
 996                                         return(copied);
 997 
 998                                 if (sk->err) 
 999                                 {
1000                                         tmp = -sk->err;
1001                                         sk->err = 0;
1002                                         return(tmp);
1003                                 }
1004 
1005                                 if (sk->keepopen) 
1006                                 {
1007                                         send_sig(SIGPIPE, current, 0);
1008                                 }
1009                                 return(-EPIPE);
1010                         }
1011 
1012                         if (nonblock || copied) 
1013                         {
1014                                 release_sock(sk);
1015                                 if (copied) 
1016                                         return(copied);
1017                                 return(-EAGAIN);
1018                         }
1019 
1020                         release_sock(sk);
1021                         cli();
1022                 
1023                         if (sk->state != TCP_ESTABLISHED &&
1024                                 sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
1025                         {
1026                                 interruptible_sleep_on(sk->sleep);
1027                                 if (current->signal & ~current->blocked) 
1028                                 {
1029                                         sti();
1030                                         if (copied) 
1031                                                 return(copied);
1032                                         return(-ERESTARTSYS);
1033                                 }
1034                         }
1035                         sk->inuse = 1;
1036                         sti();
1037                 }
1038 
1039         /*
1040          * The following code can result in copy <= if sk->mss is ever
1041          * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
1042          * sk->mtu is constant once SYN processing is finished.  I.e. we
1043          * had better not get here until we've seen his SYN and at least one
1044          * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
1045          * But ESTABLISHED should guarantee that.  sk->max_window is by definition
1046          * non-decreasing.  Note that any ioctl to set user_mss must be done
1047          * before the exchange of SYN's.  If the initial ack from the other
1048          * end has a window of 0, max_window and thus mss will both be 0.
1049          */
1050 
1051         /* 
1052          *      Now we need to check if we have a half built packet. 
1053          */
1054 
1055                 if ((skb = tcp_dequeue_partial(sk)) != NULL) 
1056                 {
1057                         int hdrlen;
1058 
1059                          /* IP header + TCP header */
1060                         hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
1061                                  + sizeof(struct tcphdr);
1062         
1063                         /* Add more stuff to the end of skb->len */
1064                         if (!(flags & MSG_OOB)) 
1065                         {
1066                                 copy = min(sk->mss - (skb->len - hdrlen), len);
1067                                 /* FIXME: this is really a bug. */
1068                                 if (copy <= 0) 
1069                                 {
1070                                         printk("TCP: **bug**: \"copy\" <= 0!!\n");
1071                                         copy = 0;
1072                                 }
1073           
1074                                 memcpy_fromfs(skb->data + skb->len, from, copy);
1075                                 skb->len += copy;
1076                                 from += copy;
1077                                 copied += copy;
1078                                 len -= copy;
1079                                 sk->write_seq += copy;
1080                         }
1081                         if ((skb->len - hdrlen) >= sk->mss ||
1082                                 (flags & MSG_OOB) || !sk->packets_out)
1083                                 tcp_send_skb(sk, skb);
1084                         else
1085                                 tcp_enqueue_partial(skb, sk);
1086                         continue;
1087                 }
1088 
1089         /*
1090          * We also need to worry about the window.
1091          * If window < 1/2 the maximum window we've seen from this
1092          *   host, don't use it.  This is sender side
1093          *   silly window prevention, as specified in RFC1122.
1094          *   (Note that this is different than earlier versions of
1095          *   SWS prevention, e.g. RFC813.).  What we actually do is 
1096          *   use the whole MSS.  Since the results in the right
1097          *   edge of the packet being outside the window, it will
1098          *   be queued for later rather than sent.
1099          */
1100 
1101                 copy = sk->window_seq - sk->write_seq;
1102                 if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
1103                         copy = sk->mss;
1104                 if (copy > len)
1105                         copy = len;
1106 
1107         /*
1108          *      We should really check the window here also. 
1109          */
1110          
1111                 send_tmp = NULL;
1112                 if (copy < sk->mss && !(flags & MSG_OOB)) 
1113                 {
1114                         /*
1115                          *      We will release the socket incase we sleep here. 
1116                          */
1117                         release_sock(sk);
1118                         /*
1119                          *      NB: following must be mtu, because mss can be increased.
1120                          *      mss is always <= mtu 
1121                          */
1122                         skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
1123                         sk->inuse = 1;
1124                         send_tmp = skb;
1125                 } 
1126                 else 
1127                 {
1128                         /*
1129                          *      We will release the socket incase we sleep here. 
1130                          */
1131                         release_sock(sk);
1132                         skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
1133                         sk->inuse = 1;
1134                 }
1135 
1136                 /*
1137                  *      If we didn't get any memory, we need to sleep. 
1138                  */
1139 
1140                 if (skb == NULL) 
1141                 {
1142                         if (nonblock) 
1143                         {
1144                                 release_sock(sk);
1145                                 if (copied) 
1146                                         return(copied);
1147                                 return(-EAGAIN);
1148                         }
1149 
1150                         /*
1151                          *      FIXME: here is another race condition. 
1152                          */
1153 
1154                         tmp = sk->wmem_alloc;
1155                         release_sock(sk);
1156                         cli();
1157                         /*
1158                          *      Again we will try to avoid it. 
1159                          */
1160                         if (tmp <= sk->wmem_alloc &&
1161                                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
1162                                 && sk->err == 0) 
1163                         {
1164                                 interruptible_sleep_on(sk->sleep);
1165                                 if (current->signal & ~current->blocked) 
1166                                 {
1167                                         sti();
1168                                         if (copied) 
1169                                                 return(copied);
1170                                         return(-ERESTARTSYS);
1171                                 }
1172                         }
1173                         sk->inuse = 1;
1174                         sti();
1175                         continue;
1176                 }
1177 
1178                 skb->len = 0;
1179                 skb->sk = sk;
1180                 skb->free = 0;
1181                 skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
1182         
1183                 buff = skb->data;
1184         
1185                 /*
1186                  * FIXME: we need to optimize this.
1187                  * Perhaps some hints here would be good.
1188                  */
1189                 
1190                 tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1191                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1192                 if (tmp < 0 ) 
1193                 {
1194                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1195                         release_sock(sk);
1196                         if (copied) 
1197                                 return(copied);
1198                         return(tmp);
1199                 }
1200                 skb->len += tmp;
1201                 skb->dev = dev;
1202                 buff += tmp;
1203                 skb->h.th =(struct tcphdr *) buff;
1204                 tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1205                 if (tmp < 0) 
1206                 {
1207                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1208                         release_sock(sk);
1209                         if (copied) 
1210                                 return(copied);
1211                         return(tmp);
1212                 }
1213 
1214                 if (flags & MSG_OOB) 
1215                 {
1216                         ((struct tcphdr *)buff)->urg = 1;
1217                         ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1218                 }
1219                 skb->len += tmp;
1220                 memcpy_fromfs(buff+tmp, from, copy);
1221 
1222                 from += copy;
1223                 copied += copy;
1224                 len -= copy;
1225                 skb->len += copy;
1226                 skb->free = 0;
1227                 sk->write_seq += copy;
1228         
1229                 if (send_tmp != NULL && sk->packets_out) 
1230                 {
1231                         tcp_enqueue_partial(send_tmp, sk);
1232                         continue;
1233                 }
1234                 tcp_send_skb(sk, skb);
1235         }
1236         sk->err = 0;
1237 
1238 /*
1239  *      Nagle's rule. Turn Nagle off with TCP_NODELAY for highly
1240  *      interactive fast network servers. It's meant to be on and
1241  *      it really improves the throughput though not the echo time
1242  *      on my slow slip link - Alan
1243  */
1244 
1245 /*
1246  *      Avoid possible race on send_tmp - c/o Johannes Stille 
1247  */
1248  
1249         if(sk->partial && ((!sk->packets_out) 
1250      /* If not nagling we can send on the before case too.. */
1251               || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1252         ))
1253                 tcp_send_partial(sk);
1254 
1255         release_sock(sk);
1256         return(copied);
1257 }
1258 
1259 
1260 static int tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1261            int len, int nonblock, unsigned flags,
1262            struct sockaddr_in *addr, int addr_len)
1263 {
1264         if (flags & ~(MSG_OOB|MSG_DONTROUTE))
1265                 return -EINVAL;
1266         if (sk->state == TCP_CLOSE)
1267                 return -ENOTCONN;
1268         if (addr_len < sizeof(*addr))
1269                 return -EINVAL;
1270         if (addr->sin_family && addr->sin_family != AF_INET) 
1271                 return -EINVAL;
1272         if (addr->sin_port != sk->dummy_th.dest) 
1273                 return -EISCONN;
1274         if (addr->sin_addr.s_addr != sk->daddr) 
1275                 return -EISCONN;
1276         return tcp_write(sk, from, len, nonblock, flags);
1277 }
1278 
1279 
1280 static void tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1281 {
1282         int tmp;
1283         struct device *dev = NULL;
1284         struct tcphdr *t1;
1285         struct sk_buff *buff;
1286 
1287         if (!sk->ack_backlog) 
1288                 return;
1289 
1290         /*
1291          * FIXME: we need to put code here to prevent this routine from
1292          * being called.  Being called once in a while is ok, so only check
1293          * if this is the second time in a row.
1294          */
1295 
1296         /*
1297          * We need to grab some memory, and put together an ack,
1298          * and then put it into the queue to be sent.
1299          */
1300 
1301         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1302         if (buff == NULL) 
1303         {
1304                 /* Try again real soon. */
1305                 reset_timer(sk, TIME_WRITE, 10);
1306                 return;
1307         }
1308 
1309         buff->len = sizeof(struct tcphdr);
1310         buff->sk = sk;
1311         buff->localroute = sk->localroute;
1312         
1313         /*
1314          *      Put in the IP header and routing stuff. 
1315          */
1316 
1317         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1318                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1319         if (tmp < 0) 
1320         {
1321                 buff->free=1;
1322                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1323                 return;
1324         }
1325 
1326         buff->len += tmp;
1327         t1 =(struct tcphdr *)(buff->data +tmp);
1328 
1329         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1330         t1->seq = htonl(sk->sent_seq);
1331         t1->ack = 1;
1332         t1->res1 = 0;
1333         t1->res2 = 0;
1334         t1->rst = 0;
1335         t1->urg = 0;
1336         t1->syn = 0;
1337         t1->psh = 0;
1338         sk->ack_backlog = 0;
1339         sk->bytes_rcv = 0;
1340         sk->window = tcp_select_window(sk);
1341         t1->window = ntohs(sk->window);
1342         t1->ack_seq = ntohl(sk->acked_seq);
1343         t1->doff = sizeof(*t1)/4;
1344         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1345         sk->prot->queue_xmit(sk, dev, buff, 1);
1346         tcp_statistics.TcpOutSegs++;
1347 }
1348 
1349 
1350 /*
1351  *      FIXME:
1352  *      This routine frees used buffers.
1353  *      It should consider sending an ACK to let the
1354  *      other end know we now have a bigger window.
1355  */
1356 
1357 static void cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1358 {
1359         unsigned long flags;
1360         unsigned long left;
1361         struct sk_buff *skb;
1362         unsigned long rspace;
1363 
1364         if(sk->debug)
1365                 printk("cleaning rbuf for sk=%p\n", sk);
1366   
1367         save_flags(flags);
1368         cli();
1369   
1370         left = sk->prot->rspace(sk);
1371  
1372         /*
1373          * We have to loop through all the buffer headers,
1374          * and try to free up all the space we can.
1375          */
1376 
1377         while((skb=skb_peek(&sk->receive_queue)) != NULL) 
1378         {
1379                 if (!skb->used) 
1380                         break;
1381                 skb_unlink(skb);
1382                 skb->sk = sk;
1383                 kfree_skb(skb, FREE_READ);
1384         }
1385 
1386         restore_flags(flags);
1387 
1388         /*
1389          * FIXME:
1390          * At this point we should send an ack if the difference
1391          * in the window, and the amount of space is bigger than
1392          * TCP_WINDOW_DIFF.
1393          */
1394 
1395         if(sk->debug)
1396                 printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk),
1397                                             left);
1398         if ((rspace=sk->prot->rspace(sk)) != left) 
1399         {
1400                 /*
1401                  * This area has caused the most trouble.  The current strategy
1402                  * is to simply do nothing if the other end has room to send at
1403                  * least 3 full packets, because the ack from those will auto-
1404                  * matically update the window.  If the other end doesn't think
1405                  * we have much space left, but we have room for at least 1 more
1406                  * complete packet than it thinks we do, we will send an ack
1407                  * immediately.  Otherwise we will wait up to .5 seconds in case
1408                  * the user reads some more.
1409                  */
1410                 sk->ack_backlog++;
1411         /*
1412          * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1413          * if the other end is offering a window smaller than the agreed on MSS
1414          * (called sk->mtu here).  In theory there's no connection between send
1415          * and receive, and so no reason to think that they're going to send
1416          * small packets.  For the moment I'm using the hack of reducing the mss
1417          * only on the send side, so I'm putting mtu here.
1418          */
1419 
1420                 if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) 
1421                 {
1422                         /* Send an ack right now. */
1423                         tcp_read_wakeup(sk);
1424                 } 
1425                 else 
1426                 {
1427                         /* Force it to send an ack soon. */
1428                         int was_active = del_timer(&sk->timer);
1429                         if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
1430                         {
1431                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1432                         } 
1433                         else
1434                                 add_timer(&sk->timer);
1435                 }
1436         }
1437 } 
1438 
1439 
1440 /*
1441  *      Handle reading urgent data. 
1442  */
1443  
1444 static int tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1445              unsigned char *to, int len, unsigned flags)
1446 {
1447         if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1448                 return -EINVAL;
1449         if (sk->err) 
1450         {
1451                 int tmp = -sk->err;
1452                 sk->err = 0;
1453                 return tmp;
1454         }
1455 
1456         if (sk->state == TCP_CLOSE || sk->done) 
1457         {
1458                 if (!sk->done) {
1459                         sk->done = 1;
1460                         return 0;
1461                 }
1462                 return -ENOTCONN;
1463         }
1464 
1465         if (sk->shutdown & RCV_SHUTDOWN) 
1466         {
1467                 sk->done = 1;
1468                 return 0;
1469         }
1470         sk->inuse = 1;
1471         if (sk->urg_data & URG_VALID) 
1472         {
1473                 char c = sk->urg_data;
1474                 if (!(flags & MSG_PEEK))
1475                         sk->urg_data = URG_READ;
1476                 put_fs_byte(c, to);
1477                 release_sock(sk);
1478                 return 1;
1479         }
1480         release_sock(sk);
1481         
1482         /*
1483          * Fixed the recv(..., MSG_OOB) behaviour.  BSD docs and
1484          * the available implementations agree in this case:
1485          * this call should never block, independent of the
1486          * blocking state of the socket.
1487          * Mike <pall@rz.uni-karlsruhe.de>
1488          */
1489         return -EAGAIN;
1490 }
1491 
1492 
1493 /*
1494  *      This routine copies from a sock struct into the user buffer. 
1495  */
1496  
1497 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1498         int len, int nonblock, unsigned flags)
1499 {
1500         struct wait_queue wait = { current, NULL };
1501         int copied = 0;
1502         unsigned long peek_seq;
1503         unsigned long *seq;
1504         unsigned long used;
1505 
1506         /* This error should be checked. */
1507         if (sk->state == TCP_LISTEN)
1508                 return -ENOTCONN;
1509 
1510         /* Urgent data needs to be handled specially. */
1511         if (flags & MSG_OOB)
1512                 return tcp_read_urg(sk, nonblock, to, len, flags);
1513 
1514         peek_seq = sk->copied_seq;
1515         seq = &sk->copied_seq;
1516         if (flags & MSG_PEEK)
1517                 seq = &peek_seq;
1518 
1519         add_wait_queue(sk->sleep, &wait);
1520         sk->inuse = 1;
1521         while (len > 0) 
1522         {
1523                 struct sk_buff * skb;
1524                 unsigned long offset;
1525         
1526                 /*
1527                  * are we at urgent data? Stop if we have read anything.
1528                  */
1529                 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1530                         break;
1531 
1532                 current->state = TASK_INTERRUPTIBLE;
1533 
1534                 skb = skb_peek(&sk->receive_queue);
1535                 do 
1536                 {
1537                         if (!skb)
1538                                 break;
1539                         if (before(1+*seq, skb->h.th->seq))
1540                                 break;
1541                         offset = 1 + *seq - skb->h.th->seq;
1542                         if (skb->h.th->syn)
1543                                 offset--;
1544                         if (offset < skb->len)
1545                                 goto found_ok_skb;
1546                         if (!(flags & MSG_PEEK))
1547                                 skb->used = 1;
1548                         skb = skb->next;
1549                 }
1550                 while (skb != (struct sk_buff *)&sk->receive_queue);
1551 
1552                 if (copied)
1553                         break;
1554 
1555                 if (sk->err) 
1556                 {
1557                         copied = -sk->err;
1558                         sk->err = 0;
1559                         break;
1560                 }
1561 
1562                 if (sk->state == TCP_CLOSE) 
1563                 {
1564                         if (!sk->done) 
1565                         {
1566                                 sk->done = 1;
1567                                 break;
1568                         }
1569                         copied = -ENOTCONN;
1570                         break;
1571                 }
1572 
1573                 if (sk->shutdown & RCV_SHUTDOWN) 
1574                 {
1575                         sk->done = 1;
1576                         break;
1577                 }
1578                         
1579                 if (nonblock) 
1580                 {
1581                         copied = -EAGAIN;
1582                         break;
1583                 }
1584 
1585                 cleanup_rbuf(sk);
1586                 release_sock(sk);
1587                 schedule();
1588                 sk->inuse = 1;
1589 
1590                 if (current->signal & ~current->blocked) 
1591                 {
1592                         copied = -ERESTARTSYS;
1593                         break;
1594                 }
1595                 continue;
1596 
1597         found_ok_skb:
1598                 /* Ok so how much can we use ? */
1599                 used = skb->len - offset;
1600                 if (len < used)
1601                         used = len;
1602                 /* do we have urgent data here? */
1603                 if (sk->urg_data) 
1604                 {
1605                         unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1606                         if (urg_offset < used) 
1607                         {
1608                                 if (!urg_offset) 
1609                                 {
1610                                         if (!sk->urginline) 
1611                                         {
1612                                                 ++*seq;
1613                                                 offset++;
1614                                                 used--;
1615                                         }
1616                                 }
1617                                 else
1618                                         used = urg_offset;
1619                         }
1620                 }
1621                 /* Copy it */
1622                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1623                         skb->h.th->doff*4 + offset, used);
1624                 copied += used;
1625                 len -= used;
1626                 to += used;
1627                 *seq += used;
1628                 if (after(sk->copied_seq+1,sk->urg_seq))
1629                         sk->urg_data = 0;
1630                 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1631                         skb->used = 1;
1632         }
1633         remove_wait_queue(sk->sleep, &wait);
1634         current->state = TASK_RUNNING;
1635 
1636         /* Clean up data we have read: This will do ACK frames */
1637         cleanup_rbuf(sk);
1638         release_sock(sk);
1639         return copied;
1640 }
1641 
1642  
1643 /*
1644  *      Shutdown the sending side of a connection.
1645  */
1646 
1647 void tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1648 {
1649         struct sk_buff *buff;
1650         struct tcphdr *t1, *th;
1651         struct proto *prot;
1652         int tmp;
1653         struct device *dev = NULL;
1654 
1655         /*
1656          * We need to grab some memory, and put together a FIN,
1657          * and then put it into the queue to be sent.
1658          * FIXME:
1659          *
1660          *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1661          *      Most of this is guesswork, so maybe it will work...
1662          */
1663 
1664         if (!(how & SEND_SHUTDOWN)) 
1665                 return;
1666          
1667         /*
1668          *      If we've already sent a FIN, return. 
1669          */
1670          
1671         if (sk->state == TCP_FIN_WAIT1 ||
1672             sk->state == TCP_FIN_WAIT2 ||
1673             sk->state == TCP_CLOSING ||
1674             sk->state == TCP_LAST_ACK ||
1675             sk->state == TCP_TIME_WAIT
1676         ) 
1677         {
1678                 return;
1679         }
1680         sk->inuse = 1;
1681 
1682         /*
1683          * flag that the sender has shutdown
1684          */
1685 
1686         sk->shutdown |= SEND_SHUTDOWN;
1687 
1688         /*
1689          *  Clear out any half completed packets. 
1690          */
1691 
1692         if (sk->partial)
1693                 tcp_send_partial(sk);
1694 
1695         prot =(struct proto *)sk->prot;
1696         th =(struct tcphdr *)&sk->dummy_th;
1697         release_sock(sk); /* incase the malloc sleeps. */
1698         buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1699         if (buff == NULL)
1700                 return;
1701         sk->inuse = 1;
1702 
1703         buff->sk = sk;
1704         buff->len = sizeof(*t1);
1705         buff->localroute = sk->localroute;
1706         t1 =(struct tcphdr *) buff->data;
1707 
1708         /*
1709          *      Put in the IP header and routing stuff. 
1710          */
1711 
1712         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1713                            IPPROTO_TCP, sk->opt,
1714                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1715         if (tmp < 0) 
1716         {
1717                 /*
1718                  *      Finish anyway, treat this as a send that got lost. 
1719                  *
1720                  *      Enter FIN_WAIT1 on normal shutdown, which waits for
1721                  *      written data to be completely acknowledged along
1722                  *      with an acknowledge to our FIN.
1723                  *
1724                  *      Enter FIN_WAIT2 on abnormal shutdown -- close before
1725                  *      connection established.
1726                  */
1727                 buff->free=1;
1728                 prot->wfree(sk,buff->mem_addr, buff->mem_len);
1729 
1730                 if (sk->state == TCP_ESTABLISHED)
1731                         tcp_set_state(sk,TCP_FIN_WAIT1);
1732                 else if(sk->state == TCP_CLOSE_WAIT)
1733                         tcp_set_state(sk,TCP_LAST_ACK);
1734                 else
1735                         tcp_set_state(sk,TCP_FIN_WAIT2);
1736 
1737                 release_sock(sk);
1738                 return;
1739         }
1740 
1741         t1 =(struct tcphdr *)((char *)t1 +tmp);
1742         buff->len += tmp;
1743         buff->dev = dev;
1744         memcpy(t1, th, sizeof(*t1));
1745         t1->seq = ntohl(sk->write_seq);
1746         sk->write_seq++;
1747         buff->h.seq = sk->write_seq;
1748         t1->ack = 1;
1749         t1->ack_seq = ntohl(sk->acked_seq);
1750         t1->window = ntohs(sk->window=tcp_select_window(sk));
1751         t1->fin = 1;
1752         t1->rst = 0;
1753         t1->doff = sizeof(*t1)/4;
1754         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1755 
1756         /*
1757          * If there is data in the write queue, the fin must be appended to
1758          * the write queue.
1759          */
1760         
1761         if (skb_peek(&sk->write_queue) != NULL) 
1762         {
1763                 buff->free=0;
1764                 if (buff->next != NULL) 
1765                 {
1766                         printk("tcp_shutdown: next != NULL\n");
1767                         skb_unlink(buff);
1768                 }
1769                 skb_queue_tail(&sk->write_queue, buff);
1770         } 
1771         else 
1772         {
1773                 sk->sent_seq = sk->write_seq;
1774                 sk->prot->queue_xmit(sk, dev, buff, 0);
1775         }
1776 
1777         if (sk->state == TCP_ESTABLISHED) 
1778                 tcp_set_state(sk,TCP_FIN_WAIT1);
1779         else if (sk->state == TCP_CLOSE_WAIT)
1780                 tcp_set_state(sk,TCP_LAST_ACK);
1781         else
1782                 tcp_set_state(sk,TCP_FIN_WAIT2);
1783 
1784         release_sock(sk);
1785 }
1786 
1787 
1788 static int
1789 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1790              int to_len, int nonblock, unsigned flags,
1791              struct sockaddr_in *addr, int *addr_len)
1792 {
1793         int result;
1794   
1795         /* 
1796          *      Have to check these first unlike the old code. If 
1797          *      we check them after we lose data on an error
1798          *      which is wrong 
1799          */
1800 
1801         if(addr_len)
1802                 *addr_len = sizeof(*addr);
1803         result=tcp_read(sk, to, to_len, nonblock, flags);
1804 
1805         if (result < 0) 
1806                 return(result);
1807   
1808         if(addr)
1809         {
1810                 addr->sin_family = AF_INET;
1811                 addr->sin_port = sk->dummy_th.dest;
1812                 addr->sin_addr.s_addr = sk->daddr;
1813         }
1814         return(result);
1815 }
1816 
1817 
1818 /*
1819  *      This routine will send an RST to the other tcp. 
1820  */
1821  
1822 static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1823           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1824 {
1825         struct sk_buff *buff;
1826         struct tcphdr *t1;
1827         int tmp;
1828         struct device *ndev=NULL;
1829   
1830 /*
1831  * We need to grab some memory, and put together an RST,
1832  * and then put it into the queue to be sent.
1833  */
1834 
1835         buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1836         if (buff == NULL) 
1837                 return;
1838 
1839         buff->len = sizeof(*t1);
1840         buff->sk = NULL;
1841         buff->dev = dev;
1842         buff->localroute = 0;
1843 
1844         t1 =(struct tcphdr *) buff->data;
1845 
1846         /*
1847          *      Put in the IP header and routing stuff. 
1848          */
1849 
1850         tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
1851                            sizeof(struct tcphdr),tos,ttl);
1852         if (tmp < 0) 
1853         {
1854                 buff->free = 1;
1855                 prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1856                 return;
1857         }
1858 
1859         t1 =(struct tcphdr *)((char *)t1 +tmp);
1860         buff->len += tmp;
1861         memcpy(t1, th, sizeof(*t1));
1862 
1863         /*
1864          *      Swap the send and the receive. 
1865          */
1866 
1867         t1->dest = th->source;
1868         t1->source = th->dest;
1869         t1->rst = 1;  
1870         t1->window = 0;
1871   
1872         if(th->ack)
1873         {
1874                 t1->ack = 0;
1875                 t1->seq = th->ack_seq;
1876                 t1->ack_seq = 0;
1877         }
1878         else
1879         {
1880                 t1->ack = 1;
1881                 if(!th->syn)
1882                         t1->ack_seq=htonl(th->seq);
1883                 else
1884                         t1->ack_seq=htonl(th->seq+1);
1885                 t1->seq=0;
1886         }
1887 
1888         t1->syn = 0;
1889         t1->urg = 0;
1890         t1->fin = 0;
1891         t1->psh = 0;
1892         t1->doff = sizeof(*t1)/4;
1893         tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1894         prot->queue_xmit(NULL, ndev, buff, 1);
1895         tcp_statistics.TcpOutSegs++;
1896 }
1897 
1898 
1899 /*
1900  *      Look for tcp options. Parses everything but only knows about MSS.
1901  *      This routine is always called with the packet containing the SYN.
1902  *      However it may also be called with the ack to the SYN.  So you
1903  *      can't assume this is always the SYN.  It's always called after
1904  *      we have set up sk->mtu to our own MTU.
1905  */
1906  
1907 static void tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1908 {
1909         unsigned char *ptr;
1910         int length=(th->doff*4)-sizeof(struct tcphdr);
1911         int mss_seen = 0;
1912     
1913         ptr = (unsigned char *)(th + 1);
1914   
1915         while(length>0)
1916         {
1917                 int opcode=*ptr++;
1918                 int opsize=*ptr++;
1919                 switch(opcode)
1920                 {
1921                         case TCPOPT_EOL:
1922                                 return;
1923                         case TCPOPT_NOP:
1924                                 length-=2;
1925                                 continue;
1926                         
1927                         default:
1928                                 if(opsize<=2)   /* Avoid silly options looping forever */
1929                                         return;
1930                                 switch(opcode)
1931                                 {
1932                                         case TCPOPT_MSS:
1933                                                 if(opsize==4 && th->syn)
1934                                                 {
1935                                                         sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1936                                                         mss_seen = 1;
1937                                                 }
1938                                                 break;
1939                                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1940                                 }
1941                                 ptr+=opsize-2;
1942                                 length-=opsize;
1943                 }
1944         }
1945         if (th->syn) 
1946         {
1947                 if (! mss_seen)
1948                       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1949         }
1950 #ifdef CONFIG_INET_PCTCP
1951         sk->mss = min(sk->max_window >> 1, sk->mtu);
1952 #else    
1953         sk->mss = min(sk->max_window, sk->mtu);
1954 #endif  
1955 }
1956 
1957 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1958 {
1959         dst = ntohl(dst);
1960         if (IN_CLASSA(dst))
1961                 return htonl(IN_CLASSA_NET);
1962         if (IN_CLASSB(dst))
1963                 return htonl(IN_CLASSB_NET);
1964         return htonl(IN_CLASSC_NET);
1965 }
1966 
1967 /*
1968  *      Default sequence number picking algorithm.
1969  */
1970 
1971 extern inline long tcp_init_seq(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1972 {
1973         return jiffies * SEQ_TICK - seq_offset; 
1974 }
1975 
1976 /*
1977  *      This routine handles a connection request.
1978  *      It should make sure we haven't already responded.
1979  *      Because of the way BSD works, we have to send a syn/ack now.
1980  *      This also means it will be harder to close a socket which is
1981  *      listening.
1982  */
1983  
1984 static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1985                  unsigned long daddr, unsigned long saddr,
1986                  struct options *opt, struct device *dev, unsigned long seq)
1987 {
1988         struct sk_buff *buff;
1989         struct tcphdr *t1;
1990         unsigned char *ptr;
1991         struct sock *newsk;
1992         struct tcphdr *th;
1993         struct device *ndev=NULL;
1994         int tmp;
1995         struct rtable *rt;
1996   
1997         th = skb->h.th;
1998 
1999         /* If the socket is dead, don't accept the connection. */
2000         if (!sk->dead) 
2001         {
2002                 sk->data_ready(sk,0);
2003         }
2004         else 
2005         {
2006                 if(sk->debug)
2007                         printk("Reset on %p: Connect on dead socket.\n",sk);
2008                 tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
2009                 tcp_statistics.TcpAttemptFails++;
2010                 kfree_skb(skb, FREE_READ);
2011                 return;
2012         }
2013 
2014         /*
2015          * Make sure we can accept more.  This will prevent a
2016          * flurry of syns from eating up all our memory.
2017          */
2018 
2019         if (sk->ack_backlog >= sk->max_ack_backlog) 
2020         {
2021                 tcp_statistics.TcpAttemptFails++;
2022                 kfree_skb(skb, FREE_READ);
2023                 return;
2024         }
2025 
2026         /*
2027          * We need to build a new sock struct.
2028          * It is sort of bad to have a socket without an inode attached
2029          * to it, but the wake_up's will just wake up the listening socket,
2030          * and if the listening socket is destroyed before this is taken
2031          * off of the queue, this will take care of it.
2032          */
2033 
2034         newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
2035         if (newsk == NULL) 
2036         {
2037                 /* just ignore the syn.  It will get retransmitted. */
2038                 tcp_statistics.TcpAttemptFails++;
2039                 kfree_skb(skb, FREE_READ);
2040                 return;
2041         }
2042 
2043         memcpy(newsk, sk, sizeof(*newsk));
2044         skb_queue_head_init(&newsk->write_queue);
2045         skb_queue_head_init(&newsk->receive_queue);
2046         newsk->send_head = NULL;
2047         newsk->send_tail = NULL;
2048         skb_queue_head_init(&newsk->back_log);
2049         newsk->rtt = 0;         /*TCP_CONNECT_TIME<<3*/
2050         newsk->rto = TCP_TIMEOUT_INIT;
2051         newsk->mdev = 0;
2052         newsk->max_window = 0;
2053         newsk->cong_window = 1;
2054         newsk->cong_count = 0;
2055         newsk->ssthresh = 0;
2056         newsk->backoff = 0;
2057         newsk->blog = 0;
2058         newsk->intr = 0;
2059         newsk->proc = 0;
2060         newsk->done = 0;
2061         newsk->partial = NULL;
2062         newsk->pair = NULL;
2063         newsk->wmem_alloc = 0;
2064         newsk->rmem_alloc = 0;
2065         newsk->localroute = sk->localroute;
2066 
2067         newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
2068 
2069         newsk->err = 0;
2070         newsk->shutdown = 0;
2071         newsk->ack_backlog = 0;
2072         newsk->acked_seq = skb->h.th->seq+1;
2073         newsk->fin_seq = skb->h.th->seq;
2074         newsk->copied_seq = skb->h.th->seq;
2075         newsk->state = TCP_SYN_RECV;
2076         newsk->timeout = 0;
2077         newsk->write_seq = seq; 
2078         newsk->window_seq = newsk->write_seq;
2079         newsk->rcv_ack_seq = newsk->write_seq;
2080         newsk->urg_data = 0;
2081         newsk->retransmits = 0;
2082         newsk->destroy = 0;
2083         init_timer(&newsk->timer);
2084         newsk->timer.data = (unsigned long)newsk;
2085         newsk->timer.function = &net_timer;
2086         newsk->dummy_th.source = skb->h.th->dest;
2087         newsk->dummy_th.dest = skb->h.th->source;
2088         
2089         /*
2090          *      Swap these two, they are from our point of view. 
2091          */
2092          
2093         newsk->daddr = saddr;
2094         newsk->saddr = daddr;
2095 
2096         put_sock(newsk->num,newsk);
2097         newsk->dummy_th.res1 = 0;
2098         newsk->dummy_th.doff = 6;
2099         newsk->dummy_th.fin = 0;
2100         newsk->dummy_th.syn = 0;
2101         newsk->dummy_th.rst = 0;        
2102         newsk->dummy_th.psh = 0;
2103         newsk->dummy_th.ack = 0;
2104         newsk->dummy_th.urg = 0;
2105         newsk->dummy_th.res2 = 0;
2106         newsk->acked_seq = skb->h.th->seq + 1;
2107         newsk->copied_seq = skb->h.th->seq;
2108         newsk->socket = NULL;
2109 
2110         /*
2111          *      Grab the ttl and tos values and use them 
2112          */
2113 
2114         newsk->ip_ttl=sk->ip_ttl;
2115         newsk->ip_tos=skb->ip_hdr->tos;
2116 
2117         /*
2118          *      Use 512 or whatever user asked for 
2119          */
2120 
2121         /*
2122          *      Note use of sk->user_mss, since user has no direct access to newsk 
2123          */
2124 
2125         rt=ip_rt_route(saddr, NULL,NULL);
2126         
2127         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
2128                 newsk->window_clamp = rt->rt_window;
2129         else
2130                 newsk->window_clamp = 0;
2131                 
2132         if (sk->user_mss)
2133                 newsk->mtu = sk->user_mss;
2134         else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
2135                 newsk->mtu = rt->rt_mss - HEADER_SIZE;
2136         else 
2137         {
2138 #ifdef CONFIG_INET_SNARL        /* Sub Nets Are Local */
2139                 if ((saddr ^ daddr) & default_mask(saddr))
2140 #else
2141                 if ((saddr ^ daddr) & dev->pa_mask)
2142 #endif
2143                         newsk->mtu = 576 - HEADER_SIZE;
2144                 else
2145                         newsk->mtu = MAX_WINDOW;
2146         }
2147 
2148         /*
2149          *      But not bigger than device MTU 
2150          */
2151 
2152         newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
2153 
2154         /*
2155          *      This will min with what arrived in the packet 
2156          */
2157 
2158         tcp_options(newsk,skb->h.th);
2159 
2160         buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
2161         if (buff == NULL) 
2162         {
2163                 sk->err = -ENOMEM;
2164                 newsk->dead = 1;
2165                 release_sock(newsk);
2166                 kfree_skb(skb, FREE_READ);
2167                 tcp_statistics.TcpAttemptFails++;
2168                 return;
2169         }
2170   
2171         buff->len = sizeof(struct tcphdr)+4;
2172         buff->sk = newsk;
2173         buff->localroute = newsk->localroute;
2174 
2175         t1 =(struct tcphdr *) buff->data;
2176 
2177         /*
2178          *      Put in the IP header and routing stuff. 
2179          */
2180 
2181         tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
2182                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2183 
2184         /*
2185          *      Something went wrong. 
2186          */
2187 
2188         if (tmp < 0) 
2189         {
2190                 sk->err = tmp;
2191                 buff->free=1;
2192                 kfree_skb(buff,FREE_WRITE);
2193                 newsk->dead = 1;
2194                 release_sock(newsk);
2195                 skb->sk = sk;
2196                 kfree_skb(skb, FREE_READ);
2197                 tcp_statistics.TcpAttemptFails++;
2198                 return;
2199         }
2200 
2201         buff->len += tmp;
2202         t1 =(struct tcphdr *)((char *)t1 +tmp);
2203   
2204         memcpy(t1, skb->h.th, sizeof(*t1));
2205         buff->h.seq = newsk->write_seq;
2206         /*
2207          *      Swap the send and the receive. 
2208          */
2209         t1->dest = skb->h.th->source;
2210         t1->source = newsk->dummy_th.source;
2211         t1->seq = ntohl(newsk->write_seq++);
2212         t1->ack = 1;
2213         newsk->window = tcp_select_window(newsk);
2214         newsk->sent_seq = newsk->write_seq;
2215         t1->window = ntohs(newsk->window);
2216         t1->res1 = 0;
2217         t1->res2 = 0;
2218         t1->rst = 0;
2219         t1->urg = 0;
2220         t1->psh = 0;
2221         t1->syn = 1;
2222         t1->ack_seq = ntohl(skb->h.th->seq+1);
2223         t1->doff = sizeof(*t1)/4+1;
2224         ptr =(unsigned char *)(t1+1);
2225         ptr[0] = 2;
2226         ptr[1] = 4;
2227         ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2228         ptr[3] =(newsk->mtu) & 0xff;
2229 
2230         tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2231         newsk->prot->queue_xmit(newsk, ndev, buff, 0);
2232 
2233         reset_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT);
2234         skb->sk = newsk;
2235 
2236         /*
2237          *      Charge the sock_buff to newsk. 
2238          */
2239          
2240         sk->rmem_alloc -= skb->mem_len;
2241         newsk->rmem_alloc += skb->mem_len;
2242         
2243         skb_queue_tail(&sk->receive_queue,skb);
2244         sk->ack_backlog++;
2245         release_sock(newsk);
2246         tcp_statistics.TcpOutSegs++;
2247 }
2248 
2249 
2250 static void tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2251 {
2252         struct sk_buff *buff;
2253         struct tcphdr *t1, *th;
2254         struct proto *prot;
2255         struct device *dev=NULL;
2256         int tmp;
2257 
2258         /*
2259          * We need to grab some memory, and put together a FIN, 
2260          * and then put it into the queue to be sent.
2261          */
2262         sk->inuse = 1;
2263         sk->keepopen = 1;
2264         sk->shutdown = SHUTDOWN_MASK;
2265 
2266         if (!sk->dead) 
2267                 sk->state_change(sk);
2268 
2269         if (timeout == 0) 
2270         {
2271                 /*
2272                  *  We need to flush the recv. buffs.  We do this only on the
2273                  *  descriptor close, not protocol-sourced closes, because the
2274                  *  reader process may not have drained the data yet!
2275                  */
2276 
2277                 if (skb_peek(&sk->receive_queue) != NULL) 
2278                 {
2279                         struct sk_buff *skb;
2280                         if(sk->debug)
2281                                 printk("Clean rcv queue\n");
2282                         while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
2283                                 kfree_skb(skb, FREE_READ);
2284                         if(sk->debug)
2285                                 printk("Cleaned.\n");
2286                 }
2287         }
2288 
2289         /*
2290          *      Get rid off any half-completed packets. 
2291          */
2292          
2293         if (sk->partial) 
2294         {
2295                 tcp_send_partial(sk);
2296         }
2297 
2298         switch(sk->state) 
2299         {
2300                 case TCP_FIN_WAIT1:
2301                 case TCP_FIN_WAIT2:
2302                 case TCP_CLOSING:
2303                         /*
2304                          * These states occur when we have already closed out
2305                          * our end.  If there is no timeout, we do not do
2306                          * anything.  We may still be in the middle of sending
2307                          * the remainder of our buffer, for example...
2308                          * resetting the timer would be inappropriate.
2309                          *
2310                          * XXX if retransmit count reaches limit, is tcp_close()
2311                          * called with timeout == 1 ? if not, we need to fix that.
2312                          */
2313                         if (!timeout) {
2314                                 int timer_active;
2315 
2316                                 timer_active = del_timer(&sk->timer);
2317                                 if (timer_active)
2318                                         add_timer(&sk->timer);
2319                                 else
2320                                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2321                         }
2322                         if (timeout) 
2323                                 tcp_time_wait(sk);
2324                         release_sock(sk);
2325                         return; /* break causes a double release - messy */
2326                 case TCP_TIME_WAIT:
2327                 case TCP_LAST_ACK:
2328                         /*
2329                          * A timeout from these states terminates the TCB.
2330                          */
2331                         if (timeout) 
2332                         {
2333                                 tcp_set_state(sk,TCP_CLOSE);
2334                         }
2335                         release_sock(sk);
2336                         return;
2337                 case TCP_LISTEN:
2338                         /* we need to drop any sockets which have been connected,
2339                            but have not yet been accepted. */
2340                         tcp_set_state(sk,TCP_CLOSE);
2341                         tcp_close_pending(sk, timeout);
2342                         release_sock(sk);
2343                         return;
2344                 case TCP_CLOSE:
2345                         release_sock(sk);
2346                         return;
2347                 case TCP_CLOSE_WAIT:
2348                 case TCP_ESTABLISHED:
2349                 case TCP_SYN_SENT:
2350                 case TCP_SYN_RECV:
2351                         prot =(struct proto *)sk->prot;
2352                         th =(struct tcphdr *)&sk->dummy_th;
2353                         buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2354                         if (buff == NULL) 
2355                         {
2356                                 /* This will force it to try again later. */
2357                                 /* Or it would have if someone released the socket
2358                                    first. Anyway it might work now */
2359                                 release_sock(sk);
2360                                 if (sk->state != TCP_CLOSE_WAIT)
2361                                         tcp_set_state(sk,TCP_ESTABLISHED);
2362                                 reset_timer(sk, TIME_CLOSE, 100);
2363                                 return;
2364                         }
2365                         buff->sk = sk;
2366                         buff->free = 1;
2367                         buff->len = sizeof(*t1);
2368                         buff->localroute = sk->localroute;
2369                         t1 =(struct tcphdr *) buff->data;
2370         
2371                         /*
2372                          *      Put in the IP header and routing stuff. 
2373                          */
2374                         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2375                                          IPPROTO_TCP, sk->opt,
2376                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2377                         if (tmp < 0) 
2378                         {
2379                                 sk->write_seq++;        /* Very important 8) */
2380                                 kfree_skb(buff,FREE_WRITE);
2381 
2382                                 /*
2383                                  * Enter FIN_WAIT1 to await completion of
2384                                  * written out data and ACK to our FIN.
2385                                  */
2386 
2387                                 if(sk->state==TCP_ESTABLISHED)
2388                                         tcp_set_state(sk,TCP_FIN_WAIT1);
2389                                 else
2390                                         tcp_set_state(sk,TCP_FIN_WAIT2);
2391                                 reset_timer(sk, TIME_CLOSE,4*sk->rto);
2392                                 if(timeout)
2393                                         tcp_time_wait(sk);
2394 
2395                                 release_sock(sk);
2396                                 return;
2397                         }
2398 
2399                         t1 =(struct tcphdr *)((char *)t1 +tmp);
2400                         buff->len += tmp;
2401                         buff->dev = dev;
2402                         memcpy(t1, th, sizeof(*t1));
2403                         t1->seq = ntohl(sk->write_seq);
2404                         sk->write_seq++;
2405                         buff->h.seq = sk->write_seq;
2406                         t1->ack = 1;
2407         
2408                         /* 
2409                          *      Ack everything immediately from now on. 
2410                          */
2411 
2412                         sk->delay_acks = 0;
2413                         t1->ack_seq = ntohl(sk->acked_seq);
2414                         t1->window = ntohs(sk->window=tcp_select_window(sk));
2415                         t1->fin = 1;
2416                         t1->rst = 0;
2417                         t1->doff = sizeof(*t1)/4;
2418                         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2419 
2420                         tcp_statistics.TcpOutSegs++;
2421         
2422                         if (skb_peek(&sk->write_queue) == NULL) 
2423                         {
2424                                 sk->sent_seq = sk->write_seq;
2425                                 prot->queue_xmit(sk, dev, buff, 0);
2426                         } 
2427                         else 
2428                         {
2429                                 reset_timer(sk, TIME_WRITE, sk->rto);
2430                                 if (buff->next != NULL) 
2431                                 {
2432                                         printk("tcp_close: next != NULL\n");
2433                                         skb_unlink(buff);
2434                                 }
2435                                 skb_queue_tail(&sk->write_queue, buff);
2436                         }
2437 
2438                         /*
2439                          * If established (normal close), enter FIN_WAIT1.
2440                          * If in CLOSE_WAIT, enter LAST_ACK
2441                          * If in CLOSING, remain in CLOSING
2442                          * otherwise enter FIN_WAIT2
2443                          */
2444 
2445                         if (sk->state == TCP_ESTABLISHED)
2446                                 tcp_set_state(sk,TCP_FIN_WAIT1);
2447                         else if (sk->state == TCP_CLOSE_WAIT)
2448                                 tcp_set_state(sk,TCP_LAST_ACK);
2449                         else if (sk->state != TCP_CLOSING)
2450                                 tcp_set_state(sk,TCP_FIN_WAIT2);
2451         }
2452         release_sock(sk);
2453 }
2454 
2455 
2456 /*
2457  * This routine takes stuff off of the write queue,
2458  * and puts it in the xmit queue.
2459  */
2460 static void
2461 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2462 {
2463         struct sk_buff *skb;
2464 
2465         /*
2466          *      The bytes will have to remain here. In time closedown will
2467          *      empty the write queue and all will be happy 
2468          */
2469 
2470         if(sk->zapped)
2471                 return;
2472 
2473         while((skb = skb_peek(&sk->write_queue)) != NULL &&
2474                 before(skb->h.seq, sk->window_seq + 1) &&
2475                 (sk->retransmits == 0 ||
2476                  sk->timeout != TIME_WRITE ||
2477                  before(skb->h.seq, sk->rcv_ack_seq + 1))
2478                 && sk->packets_out < sk->cong_window) 
2479         {
2480                 IS_SKB(skb);
2481                 skb_unlink(skb);
2482                 /* See if we really need to send the packet. */
2483                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) 
2484                 {
2485                         sk->retransmits = 0;
2486                         kfree_skb(skb, FREE_WRITE);
2487                         if (!sk->dead) 
2488                                 sk->write_space(sk);
2489                 } 
2490                 else
2491                 {
2492                         struct tcphdr *th;
2493                         struct iphdr *iph;
2494                         int size;
2495 /*
2496  * put in the ack seq and window at this point rather than earlier,
2497  * in order to keep them monotonic.  We really want to avoid taking
2498  * back window allocations.  That's legal, but RFC1122 says it's frowned on.
2499  * Ack and window will in general have changed since this packet was put
2500  * on the write queue.
2501  */
2502                         iph = (struct iphdr *)(skb->data +
2503                                                skb->dev->hard_header_len);
2504                         th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
2505                         size = skb->len - (((unsigned char *) th) - skb->data);
2506                         
2507                         th->ack_seq = ntohl(sk->acked_seq);
2508                         th->window = ntohs(tcp_select_window(sk));
2509 
2510                         tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
2511 
2512                         sk->sent_seq = skb->h.seq;
2513                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2514                 }
2515         }
2516 }
2517 
2518 
2519 /*
2520  *      This routine deals with incoming acks, but not outgoing ones.
2521  */
2522 
2523 static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2524 {
2525         unsigned long ack;
2526         int flag = 0;
2527 
2528         /* 
2529          * 1 - there was data in packet as well as ack or new data is sent or 
2530          *     in shutdown state
2531          * 2 - data from retransmit queue was acked and removed
2532          * 4 - window shrunk or data from retransmit queue was acked and removed
2533          */
2534 
2535         if(sk->zapped)
2536                 return(1);      /* Dead, cant ack any more so why bother */
2537 
2538         ack = ntohl(th->ack_seq);
2539         if (ntohs(th->window) > sk->max_window) 
2540         {
2541                 sk->max_window = ntohs(th->window);
2542 #ifdef CONFIG_INET_PCTCP
2543                 sk->mss = min(sk->max_window>>1, sk->mtu);
2544 #else
2545                 sk->mss = min(sk->max_window, sk->mtu);
2546 #endif  
2547         }
2548 
2549         if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2550                 sk->retransmits = 0;
2551 
2552         if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
2553         {
2554                 if(sk->debug)
2555                         printk("Ack ignored %lu %lu\n",ack,sk->sent_seq);
2556                         
2557                 /*
2558                  *      Keepalive processing.
2559                  */
2560                  
2561                 if (after(ack, sk->sent_seq) || (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) 
2562                 {
2563                         return(0);
2564                 }
2565                 if (sk->keepopen) 
2566                 {
2567                         if(sk->timeout==TIME_KEEPOPEN)
2568                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2569                 }
2570                 return(1);
2571         }
2572 
2573         if (len != th->doff*4) 
2574                 flag |= 1;
2575 
2576         /* See if our window has been shrunk. */
2577 
2578         if (after(sk->window_seq, ack+ntohs(th->window))) 
2579         {
2580                 /*
2581                  * We may need to move packets from the send queue
2582                  * to the write queue, if the window has been shrunk on us.
2583                  * The RFC says you are not allowed to shrink your window
2584                  * like this, but if the other end does, you must be able
2585                  * to deal with it.
2586                  */
2587                 struct sk_buff *skb;
2588                 struct sk_buff *skb2;
2589                 struct sk_buff *wskb = NULL;
2590         
2591                 skb2 = sk->send_head;
2592                 sk->send_head = NULL;
2593                 sk->send_tail = NULL;
2594         
2595                 flag |= 4;
2596         
2597                 sk->window_seq = ack + ntohs(th->window);
2598                 cli();
2599                 while (skb2 != NULL) 
2600                 {
2601                         skb = skb2;
2602                         skb2 = skb->link3;
2603                         skb->link3 = NULL;
2604                         if (after(skb->h.seq, sk->window_seq)) 
2605                         {
2606                                 if (sk->packets_out > 0) 
2607                                         sk->packets_out--;
2608                                 /* We may need to remove this from the dev send list. */
2609                                 if (skb->next != NULL) 
2610                                 {
2611                                         skb_unlink(skb);                                
2612                                 }
2613                                 /* Now add it to the write_queue. */
2614                                 if (wskb == NULL)
2615                                         skb_queue_head(&sk->write_queue,skb);
2616                                 else
2617                                         skb_append(wskb,skb);
2618                                 wskb = skb;
2619                         } 
2620                         else 
2621                         {
2622                                 if (sk->send_head == NULL) 
2623                                 {
2624                                         sk->send_head = skb;
2625                                         sk->send_tail = skb;
2626                                 }
2627                                 else
2628                                 {
2629                                         sk->send_tail->link3 = skb;
2630                                         sk->send_tail = skb;
2631                                 }
2632                                 skb->link3 = NULL;
2633                         }
2634                 }
2635                 sti();
2636         }
2637 
2638         if (sk->send_tail == NULL || sk->send_head == NULL) 
2639         {
2640                 sk->send_head = NULL;
2641                 sk->send_tail = NULL;
2642                 sk->packets_out= 0;
2643         }
2644 
2645         sk->window_seq = ack + ntohs(th->window);
2646 
2647         /* We don't want too many packets out there. */
2648         if (sk->timeout == TIME_WRITE && 
2649                 sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
2650         {
2651 /* 
2652  * This is Jacobson's slow start and congestion avoidance. 
2653  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2654  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2655  * counter and increment it once every cwnd times.  It's possible
2656  * that this should be done only if sk->retransmits == 0.  I'm
2657  * interpreting "new data is acked" as including data that has
2658  * been retransmitted but is just now being acked.
2659  */
2660                 if (sk->cong_window < sk->ssthresh)  
2661                   /* 
2662                    *    In "safe" area, increase
2663                    */
2664                         sk->cong_window++;
2665                 else 
2666                 {
2667                   /*
2668                    *    In dangerous area, increase slowly.  In theory this is
2669                    *    sk->cong_window += 1 / sk->cong_window
2670                    */
2671                         if (sk->cong_count >= sk->cong_window) 
2672                         {
2673                                 sk->cong_window++;
2674                                 sk->cong_count = 0;
2675                         }
2676                         else 
2677                                 sk->cong_count++;
2678                 }
2679         }
2680 
2681         sk->rcv_ack_seq = ack;
2682 
2683         /*
2684          * if this ack opens up a zero window, clear backoff.  It was
2685          * being used to time the probes, and is probably far higher than
2686          * it needs to be for normal retransmission.
2687          */
2688 
2689         if (sk->timeout == TIME_PROBE0) 
2690         {
2691                 if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
2692                     ! before (sk->window_seq, sk->write_queue.next->h.seq)) 
2693                 {
2694                         sk->retransmits = 0;
2695                         sk->backoff = 0;
2696                   /*
2697                    *    Recompute rto from rtt.  this eliminates any backoff.
2698                    */
2699 
2700                         sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2701                         if (sk->rto > 120*HZ)
2702                                 sk->rto = 120*HZ;
2703                         if (sk->rto < 20)       /* Was 1*HZ, then 1 - turns out we must allow about
2704                                                    .2 of a second because of BSD delayed acks - on a 100Mb/sec link
2705                                                    .2 of a second is going to need huge windows (SIGH) */
2706                                 sk->rto = 20;
2707                 }
2708         }
2709 
2710   /* 
2711    *    See if we can take anything off of the retransmit queue.
2712    */
2713    
2714         while(sk->send_head != NULL) 
2715         {
2716                 /* Check for a bug. */
2717                 if (sk->send_head->link3 &&
2718                     after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) 
2719                         printk("INET: tcp.c: *** bug send_list out of order.\n");
2720                 if (before(sk->send_head->h.seq, ack+1)) 
2721                 {
2722                         struct sk_buff *oskb;   
2723                         if (sk->retransmits) 
2724                         {       
2725                                 /*
2726                                  *      We were retransmitting.  don't count this in RTT est 
2727                                  */
2728                                 flag |= 2;
2729 
2730                                 /*
2731                                  * even though we've gotten an ack, we're still
2732                                  * retransmitting as long as we're sending from
2733                                  * the retransmit queue.  Keeping retransmits non-zero
2734                                  * prevents us from getting new data interspersed with
2735                                  * retransmissions.
2736                                  */
2737 
2738                                 if (sk->send_head->link3)
2739                                         sk->retransmits = 1;
2740                                 else
2741                                         sk->retransmits = 0;
2742                         }
2743                         /*
2744                          * Note that we only reset backoff and rto in the
2745                          * rtt recomputation code.  And that doesn't happen
2746                          * if there were retransmissions in effect.  So the
2747                          * first new packet after the retransmissions is
2748                          * sent with the backoff still in effect.  Not until
2749                          * we get an ack from a non-retransmitted packet do
2750                          * we reset the backoff and rto.  This allows us to deal
2751                          * with a situation where the network delay has increased
2752                          * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2753                          */
2754 
2755                         /*
2756                          *      We have one less packet out there. 
2757                          */
2758                          
2759                         if (sk->packets_out > 0) 
2760                                 sk->packets_out --;
2761                         /* 
2762                          *      Wake up the process, it can probably write more. 
2763                          */
2764                         if (!sk->dead) 
2765                                 sk->write_space(sk);
2766                         oskb = sk->send_head;
2767 
2768                         if (!(flag&2)) 
2769                         {
2770                                 long m;
2771         
2772                                 /*
2773                                  *      The following amusing code comes from Jacobson's
2774                                  *      article in SIGCOMM '88.  Note that rtt and mdev
2775                                  *      are scaled versions of rtt and mean deviation.
2776                                  *      This is designed to be as fast as possible 
2777                                  *      m stands for "measurement".
2778                                  */
2779         
2780                                 m = jiffies - oskb->when;  /* RTT */
2781                                 if(m<=0)
2782                                         m=1;            /* IS THIS RIGHT FOR <0 ??? */
2783                                 m -= (sk->rtt >> 3);    /* m is now error in rtt est */
2784                                 sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
2785                                 if (m < 0)
2786                                         m = -m;         /* m is now abs(error) */
2787                                 m -= (sk->mdev >> 2);   /* similar update on mdev */
2788                                 sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
2789         
2790                                 /*
2791                                  *      Now update timeout.  Note that this removes any backoff.
2792                                  */
2793                          
2794                                 sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2795                                 if (sk->rto > 120*HZ)
2796                                         sk->rto = 120*HZ;
2797                                 if (sk->rto < 20)       /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
2798                                         sk->rto = 20;
2799                                 sk->backoff = 0;
2800                         }
2801                         flag |= (2|4);
2802                         cli();
2803                         oskb = sk->send_head;
2804                         IS_SKB(oskb);
2805                         sk->send_head = oskb->link3;
2806                         if (sk->send_head == NULL) 
2807                         {
2808                                 sk->send_tail = NULL;
2809                         }
2810 
2811                 /*
2812                  *      We may need to remove this from the dev send list. 
2813                  */
2814 
2815                         if (oskb->next)
2816                                 skb_unlink(oskb);
2817                         sti();
2818                         kfree_skb(oskb, FREE_WRITE); /* write. */
2819                         if (!sk->dead) 
2820                                 sk->write_space(sk);
2821                 }
2822                 else
2823                 {
2824                         break;
2825                 }
2826         }
2827 
2828         /*
2829          * XXX someone ought to look at this too.. at the moment, if skb_peek()
2830          * returns non-NULL, we complete ignore the timer stuff in the else
2831          * clause.  We ought to organize the code so that else clause can
2832          * (should) be executed regardless, possibly moving the PROBE timer
2833          * reset over.  The skb_peek() thing should only move stuff to the
2834          * write queue, NOT also manage the timer functions.
2835          */
2836 
2837         /*
2838          * Maybe we can take some stuff off of the write queue,
2839          * and put it onto the xmit queue.
2840          */
2841         if (skb_peek(&sk->write_queue) != NULL) 
2842         {
2843                 if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
2844                         (sk->retransmits == 0 || 
2845                          sk->timeout != TIME_WRITE ||
2846                          before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
2847                         && sk->packets_out < sk->cong_window) 
2848                 {
2849                         flag |= 1;
2850                         tcp_write_xmit(sk);
2851                 }
2852                 else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
2853                         sk->send_head == NULL &&
2854                         sk->ack_backlog == 0 &&
2855                         sk->state != TCP_TIME_WAIT) 
2856                 {
2857                         reset_timer(sk, TIME_PROBE0, sk->rto);
2858                 }               
2859         }
2860         else
2861         {
2862                 /*
2863                  * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
2864                  * from TCP_CLOSE we don't do anything
2865                  *
2866                  * from anything else, if there is write data (or fin) pending,
2867                  * we use a TIME_WRITE timeout, else if keepalive we reset to
2868                  * a KEEPALIVE timeout, else we delete the timer.
2869                  *
2870                  * We do not set flag for nominal write data, otherwise we may
2871                  * force a state where we start to write itsy bitsy tidbits
2872                  * of data.
2873                  */
2874 
2875                 switch(sk->state) {
2876                 case TCP_TIME_WAIT:
2877                         /*
2878                          * keep us in TIME_WAIT until we stop getting packets,
2879                          * reset the timeout.
2880                          */
2881                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2882                         break;
2883                 case TCP_CLOSE:
2884                         /*
2885                          * don't touch the timer.
2886                          */
2887                         break;
2888                 default:
2889                         /*
2890                          * must check send_head, write_queue, and ack_backlog
2891                          * to determine which timeout to use.
2892                          */
2893                         if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) {
2894                                 reset_timer(sk, TIME_WRITE, sk->rto);
2895                         } else if (sk->keepopen) {
2896                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2897                         } else {
2898                                 delete_timer(sk);
2899                         }
2900                         break;
2901                 }
2902 #ifdef NOTDEF
2903                 if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2904                 sk->state != TCP_TIME_WAIT && !sk->keepopen) 
2905                 {
2906                         if (!sk->dead)
2907                                 sk->write_space(sk);
2908                         if (sk->keepopen) {
2909                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2910                         } else {
2911                                 delete_timer(sk);
2912                         }
2913                 }
2914                 else
2915                 {
2916                         if (sk->state != (unsigned char) sk->keepopen) 
2917                         {
2918                                 reset_timer(sk, TIME_WRITE, sk->rto);
2919                         }
2920                         if (sk->state == TCP_TIME_WAIT) 
2921                         {
2922                                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2923                         }       
2924                 }
2925 #endif
2926         }
2927 
2928         if (sk->packets_out == 0 && sk->partial != NULL &&
2929                 skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
2930         {
2931                 flag |= 1;
2932                 tcp_send_partial(sk);
2933         }
2934 
2935         /*
2936          * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
2937          * we are now waiting for an acknowledge to our FIN.  The other end is
2938          * already in TIME_WAIT.
2939          *
2940          * Move to TCP_CLOSE on success.
2941          */
2942 
2943         if (sk->state == TCP_LAST_ACK) 
2944         {
2945                 if (!sk->dead)
2946                         sk->state_change(sk);
2947                 if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) 
2948                 {
2949                         flag |= 1;
2950                         tcp_time_wait(sk);
2951                         sk->shutdown = SHUTDOWN_MASK;
2952                 }
2953         }
2954 
2955         /*
2956          * Incoming ACK to a FIN we sent in the case of our initiating the close.
2957          *
2958          * Move to FIN_WAIT2 to await a FIN from the other end. Set
2959          * SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in.
2960          */
2961 
2962         if (sk->state == TCP_FIN_WAIT1) 
2963         {
2964 
2965                 if (!sk->dead) 
2966                         sk->state_change(sk);
2967                 if (sk->rcv_ack_seq == sk->write_seq) 
2968                 {
2969                         flag |= 1;
2970                         sk->shutdown |= SEND_SHUTDOWN;
2971                         tcp_set_state(sk, TCP_FIN_WAIT2);
2972                 }
2973         }
2974 
2975         /*
2976          *      Incoming ACK to a FIN we sent in the case of a simultaneous close.
2977          *
2978          *      Move to TIME_WAIT
2979          */
2980 
2981         if (sk->state == TCP_CLOSING) 
2982         {
2983 
2984                 if (!sk->dead) 
2985                         sk->state_change(sk);
2986                 if (sk->rcv_ack_seq == sk->write_seq) 
2987                 {
2988                         flag |= 1;
2989                         tcp_time_wait(sk);
2990                 }
2991         }
2992 
2993         /*
2994          * I make no guarantees about the first clause in the following
2995          * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
2996          * what conditions "!flag" would be true.  However I think the rest
2997          * of the conditions would prevent that from causing any
2998          * unnecessary retransmission. 
2999          *   Clearly if the first packet has expired it should be 
3000          * retransmitted.  The other alternative, "flag&2 && retransmits", is
3001          * harder to explain:  You have to look carefully at how and when the
3002          * timer is set and with what timeout.  The most recent transmission always
3003          * sets the timer.  So in general if the most recent thing has timed
3004          * out, everything before it has as well.  So we want to go ahead and
3005          * retransmit some more.  If we didn't explicitly test for this
3006          * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
3007          * would not be true.  If you look at the pattern of timing, you can
3008          * show that rto is increased fast enough that the next packet would
3009          * almost never be retransmitted immediately.  Then you'd end up
3010          * waiting for a timeout to send each packet on the retransmission
3011          * queue.  With my implementation of the Karn sampling algorithm,
3012          * the timeout would double each time.  The net result is that it would
3013          * take a hideous amount of time to recover from a single dropped packet.
3014          * It's possible that there should also be a test for TIME_WRITE, but
3015          * I think as long as "send_head != NULL" and "retransmit" is on, we've
3016          * got to be in real retransmission mode.
3017          *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
3018          * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
3019          * As long as no further losses occur, this seems reasonable.
3020          */
3021         
3022         if (((!flag) || (flag&4)) && sk->send_head != NULL &&
3023                (((flag&2) && sk->retransmits) ||
3024                (sk->send_head->when + sk->rto < jiffies))) 
3025         {
3026                 if(sk->send_head->when + sk->rto < jiffies)
3027                         tcp_retransmit(sk,0);   
3028                 else
3029                 {
3030                         ip_do_retransmit(sk, 1);
3031                         reset_timer(sk, TIME_WRITE, sk->rto);
3032                 }
3033         }
3034 
3035         return(1);
3036 }
3037 
3038 
3039 /*
3040  *      This routine handles the data.  If there is room in the buffer,
3041  *      it will be have already been moved into it.  If there is no
3042  *      room, then we will just have to discard the packet.
3043  */
3044 
3045 static int tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
3046          unsigned long saddr, unsigned short len)
3047 {
3048         struct sk_buff *skb1, *skb2;
3049         struct tcphdr *th;
3050         int dup_dumped=0;
3051         unsigned long new_seq;
3052         struct sk_buff *tail;
3053         unsigned long shut_seq;
3054 
3055         th = skb->h.th;
3056         skb->len = len -(th->doff*4);
3057 
3058         /* The bytes in the receive read/assembly queue has increased. Needed for the
3059            low memory discard algorithm */
3060            
3061         sk->bytes_rcv += skb->len;
3062         
3063         if (skb->len == 0 && !th->fin && !th->urg && !th->psh) 
3064         {
3065                 /* 
3066                  *      Don't want to keep passing ack's back and forth. 
3067                  *      (someone sent us dataless, boring frame)
3068                  */
3069                 if (!th->ack)
3070                         tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
3071                 kfree_skb(skb, FREE_READ);
3072                 return(0);
3073         }
3074         
3075         /*
3076          *      We no longer have anyone receiving data on this connection.
3077          */
3078 
3079         if(sk->shutdown & RCV_SHUTDOWN)
3080         {
3081                 new_seq= th->seq + skb->len + th->syn;  /* Right edge of _data_ part of frame */
3082                 
3083                 /*
3084                  *      This is subtle and not nice. When we shut down we can
3085                  *      have data in the queue and acked_seq therefore not
3086                  *      pointing to the last byte that will be read. Thus
3087                  *      the naive implementation:
3088                  *              after(new_seq,sk->acked_seq+1)
3089                  *      will cause bogus resets IFF a resend of a frame that has
3090                  *      been queued but not yet read after a shutdown has been done.
3091                  *      What we do now is a bit more complex but works as
3092                  *      follows. If the queue is empty copied_seq+1 is right (+1 for FIN)
3093                  *      if the queue has data the shutdown occurs at the right edge of
3094                  *      the last packet queued +1
3095                  *
3096                  *      We can't simply ack data beyond this point as it has
3097                  *      and will never be received by an application.
3098                  */
3099                 tail=skb_peek(&sk->receive_queue);
3100                 if(tail!=NULL)
3101                 {
3102                         tail=sk->receive_queue.prev;
3103                         shut_seq=tail->h.th->seq+tail->len+1;
3104                 }
3105                 else
3106                         shut_seq=sk->copied_seq+1;
3107                 
3108                 if(after(new_seq,shut_seq))
3109                 {
3110                         sk->acked_seq = new_seq + th->fin;
3111                         if(sk->debug)
3112                                 printk("Data arrived on %p after close [Data right edge %lX, Socket shut on %lX] %d\n",
3113                                         sk, new_seq, shut_seq, sk->blog);
3114                         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
3115                                 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
3116                         tcp_statistics.TcpEstabResets++;
3117                         tcp_set_state(sk,TCP_CLOSE);
3118                         sk->err = EPIPE;
3119                         sk->shutdown = SHUTDOWN_MASK;
3120                         kfree_skb(skb, FREE_READ);
3121                         if (!sk->dead)
3122                                 sk->state_change(sk);
3123                         return(0);
3124                 }
3125         }
3126         /*
3127          *      Now we have to walk the chain, and figure out where this one
3128          *      goes into it.  This is set up so that the last packet we received
3129          *      will be the first one we look at, that way if everything comes
3130          *      in order, there will be no performance loss, and if they come
3131          *      out of order we will be able to fit things in nicely.
3132          */
3133 
3134         /* 
3135          *      This should start at the last one, and then go around forwards.
3136          */
3137 
3138         if (skb_peek(&sk->receive_queue) == NULL)       /* Empty queue is easy case */
3139         {
3140                 skb_queue_head(&sk->receive_queue,skb);
3141                 skb1= NULL;
3142         } 
3143         else
3144         {
3145                 for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
3146                 {
3147                         if(sk->debug)
3148                         {
3149                                 printk("skb1=%p :", skb1);
3150                                 printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
3151                                 printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
3152                                 printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
3153                                                 sk->acked_seq);
3154                         }
3155                         
3156                         /*
3157                          *      Optimisation: Duplicate frame or extension of previous frame from
3158                          *      same sequence point (lost ack case).
3159                          *      The frame contains duplicate data or replaces a previous frame
3160                          *      discard the previous frame (safe as sk->inuse is set) and put
3161                          *      the new one in its place.
3162                          */
3163                          
3164                         if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
3165                         {
3166                                 skb_append(skb1,skb);
3167                                 skb_unlink(skb1);
3168                                 kfree_skb(skb1,FREE_READ);
3169                                 dup_dumped=1;
3170                                 skb1=NULL;
3171                                 break;
3172                         }
3173                         
3174                         /*
3175                          *      Found where it fits
3176                          */
3177                          
3178                         if (after(th->seq+1, skb1->h.th->seq))
3179                         {
3180                                 skb_append(skb1,skb);
3181                                 break;
3182                         }
3183                         
3184                         /*
3185                          *      See if we've hit the start. If so insert.
3186                          */
3187                         if (skb1 == skb_peek(&sk->receive_queue))
3188                         {
3189                                 skb_queue_head(&sk->receive_queue, skb);
3190                                 break;
3191                         }
3192                 }
3193         }
3194 
3195         /*
3196          *      Figure out what the ack value for this frame is
3197          */
3198          
3199         th->ack_seq = th->seq + skb->len;
3200         if (th->syn) 
3201                 th->ack_seq++;
3202         if (th->fin)
3203                 th->ack_seq++;
3204 
3205         if (before(sk->acked_seq, sk->copied_seq)) 
3206         {
3207                 printk("*** tcp.c:tcp_data bug acked < copied\n");
3208                 sk->acked_seq = sk->copied_seq;
3209         }
3210 
3211         /*
3212          *      Now figure out if we can ack anything.
3213          */
3214 
3215         if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) 
3216         {
3217                 if (before(th->seq, sk->acked_seq+1)) 
3218                 {
3219                         int newwindow;
3220 
3221                         if (after(th->ack_seq, sk->acked_seq)) 
3222                         {
3223                                 newwindow = sk->window-(th->ack_seq - sk->acked_seq);
3224                                 if (newwindow < 0)
3225                                         newwindow = 0;  
3226                                 sk->window = newwindow;
3227                                 sk->acked_seq = th->ack_seq;
3228                         }
3229                         skb->acked = 1;
3230 
3231                         /* 
3232                          *      When we ack the fin, we turn on the RCV_SHUTDOWN flag.
3233                          */
3234 
3235                         if (skb->h.th->fin) 
3236                         {
3237                                 if (!sk->dead) 
3238                                         sk->state_change(sk);
3239                                 sk->shutdown |= RCV_SHUTDOWN;
3240                         }
3241           
3242                         for(skb2 = skb->next;
3243                             skb2 != (struct sk_buff *)&sk->receive_queue;
3244                             skb2 = skb2->next) 
3245                         {
3246                                 if (before(skb2->h.th->seq, sk->acked_seq+1)) 
3247                                 {
3248                                         if (after(skb2->h.th->ack_seq, sk->acked_seq))
3249                                         {
3250                                                 newwindow = sk->window -
3251                                                  (skb2->h.th->ack_seq - sk->acked_seq);
3252                                                 if (newwindow < 0)
3253                                                         newwindow = 0;  
3254                                                 sk->window = newwindow;
3255                                                 sk->acked_seq = skb2->h.th->ack_seq;
3256                                         }
3257                                         skb2->acked = 1;
3258                                         /*
3259                                          *      When we ack the fin, we turn on
3260                                          *      the RCV_SHUTDOWN flag.
3261                                          */
3262                                         if (skb2->h.th->fin) 
3263                                         {
3264                                                 sk->shutdown |= RCV_SHUTDOWN;
3265                                                 if (!sk->dead)
3266                                                         sk->state_change(sk);
3267                                         }
3268 
3269                                         /*
3270                                          *      Force an immediate ack.
3271                                          */
3272                                          
3273                                         sk->ack_backlog = sk->max_ack_backlog;
3274                                 }
3275                                 else
3276                                 {
3277                                         break;
3278                                 }
3279                         }
3280 
3281                         /*
3282                          *      This also takes care of updating the window.
3283                          *      This if statement needs to be simplified.
3284                          */
3285                         if (!sk->delay_acks ||
3286                             sk->ack_backlog >= sk->max_ack_backlog || 
3287                             sk->bytes_rcv > sk->max_unacked || th->fin) {
3288         /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
3289                         }
3290                         else 
3291                         {
3292                                 sk->ack_backlog++;
3293                                 if(sk->debug)
3294                                         printk("Ack queued.\n");
3295                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3296                         }
3297                 }
3298         }
3299 
3300         /*
3301          *      If we've missed a packet, send an ack.
3302          *      Also start a timer to send another.
3303          */
3304          
3305         if (!skb->acked) 
3306         {
3307         
3308         /*
3309          *      This is important.  If we don't have much room left,
3310          *      we need to throw out a few packets so we have a good
3311          *      window.  Note that mtu is used, not mss, because mss is really
3312          *      for the send side.  He could be sending us stuff as large as mtu.
3313          */
3314                  
3315                 while (sk->prot->rspace(sk) < sk->mtu) 
3316                 {
3317                         skb1 = skb_peek(&sk->receive_queue);
3318                         if (skb1 == NULL) 
3319                         {
3320                                 printk("INET: tcp.c:tcp_data memory leak detected.\n");
3321                                 break;
3322                         }
3323 
3324                         /*
3325                          *      Don't throw out something that has been acked. 
3326                          */
3327                  
3328                         if (skb1->acked) 
3329                         {
3330                                 break;
3331                         }
3332                 
3333                         skb_unlink(skb1);
3334                         kfree_skb(skb1, FREE_READ);
3335                 }
3336                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3337                 sk->ack_backlog++;
3338                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3339         }
3340         else
3341         {
3342                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3343         }
3344 
3345         /*
3346          *      Now tell the user we may have some data. 
3347          */
3348          
3349         if (!sk->dead) 
3350         {
3351                 if(sk->debug)
3352                         printk("Data wakeup.\n");
3353                 sk->data_ready(sk,0);
3354         } 
3355         return(0);
3356 }
3357 
3358 
3359 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
3360 {
3361         unsigned long ptr = ntohs(th->urg_ptr);
3362 
3363         if (ptr)
3364                 ptr--;
3365         ptr += th->seq;
3366 
3367         /* ignore urgent data that we've already seen and read */
3368         if (after(sk->copied_seq+1, ptr))
3369                 return;
3370 
3371         /* do we already have a newer (or duplicate) urgent pointer? */
3372         if (sk->urg_data && !after(ptr, sk->urg_seq))
3373                 return;
3374 
3375         /* tell the world about our new urgent pointer */
3376         if (sk->proc != 0) {
3377                 if (sk->proc > 0) {
3378                         kill_proc(sk->proc, SIGURG, 1);
3379                 } else {
3380                         kill_pg(-sk->proc, SIGURG, 1);
3381                 }
3382         }
3383         sk->urg_data = URG_NOTYET;
3384         sk->urg_seq = ptr;
3385 }
3386 
3387 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
3388         unsigned long saddr, unsigned long len)
3389 {
3390         unsigned long ptr;
3391 
3392         /* check if we get a new urgent pointer */
3393         if (th->urg)
3394                 tcp_check_urg(sk,th);
3395 
3396         /* do we wait for any urgent data? */
3397         if (sk->urg_data != URG_NOTYET)
3398                 return 0;
3399 
3400         /* is the urgent pointer pointing into this packet? */
3401         ptr = sk->urg_seq - th->seq + th->doff*4;
3402         if (ptr >= len)
3403                 return 0;
3404 
3405         /* ok, got the correct packet, update info */
3406         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
3407         if (!sk->dead)
3408                 sk->data_ready(sk,0);
3409         return 0;
3410 }
3411 
3412 
3413 /*
3414  *  This deals with incoming fins. 'Linus at 9 O'clock' 8-) 
3415  *
3416  *  If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
3417  *  (and thence onto LAST-ACK and finally, CLOSE, we never enter
3418  *  TIME-WAIT)
3419  *
3420  *  If we are in FINWAIT-1, a received FIN indicates simultaneous
3421  *  close and we go into CLOSING (and later onto TIME-WAIT)
3422  *
3423  *  If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3424  *
3425  */
3426 static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
3427          unsigned long saddr, struct device *dev)
3428 {
3429         sk->fin_seq = th->seq + skb->len + th->syn + th->fin;
3430 
3431         if (!sk->dead) 
3432         {
3433                 sk->state_change(sk);
3434         }
3435 
3436         switch(sk->state) 
3437         {
3438                 case TCP_SYN_RECV:
3439                 case TCP_SYN_SENT:
3440                 case TCP_ESTABLISHED:
3441                         /*
3442                          * move to CLOSE_WAIT, tcp_data() already handled
3443                          * sending the ack.
3444                          */
3445                         reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
3446                         tcp_set_state(sk,TCP_CLOSE_WAIT);
3447                         if (th->rst)
3448                                 sk->shutdown = SHUTDOWN_MASK;
3449                         break;
3450 
3451                 case TCP_CLOSE_WAIT:
3452                 case TCP_CLOSING:
3453                         /*
3454                          * received a retransmission of the FIN, do
3455                          * nothing.
3456                          */
3457                         break;
3458                 case TCP_TIME_WAIT:
3459                         /*
3460                          * received a retransmission of the FIN,
3461                          * restart the TIME_WAIT timer.
3462                          */
3463                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3464                         return(0);
3465                 case TCP_FIN_WAIT1:
3466                         /*
3467                          * This case occurs when a simultaneous close
3468                          * happens, we must ack the received FIN and
3469                          * enter the CLOSING state.
3470                          *
3471                          * XXX timeout not set properly
3472                          */
3473 
3474                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3475                         tcp_set_state(sk,TCP_CLOSING);
3476                         break;
3477                 case TCP_FIN_WAIT2:
3478                         /*
3479                          * received a FIN -- send ACK and enter TIME_WAIT
3480                          */
3481                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3482                         sk->shutdown|=SHUTDOWN_MASK;
3483                         tcp_set_state(sk,TCP_TIME_WAIT);
3484                         break;
3485                 case TCP_CLOSE:
3486                         /*
3487                          * already in CLOSE
3488                          */
3489                         break;
3490                 default:
3491                         tcp_set_state(sk,TCP_LAST_ACK);
3492         
3493                         /* Start the timers. */
3494                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3495                         return(0);
3496         }
3497 /*      sk->ack_backlog++;     tcp_data has already dealt with ACK's */
3498 
3499         return(0);
3500 }
3501 
3502 
3503 /* This will accept the next outstanding connection. */
3504 static struct sock *
3505 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
3506 {
3507         struct sock *newsk;
3508         struct sk_buff *skb;
3509   
3510   /*
3511    * We need to make sure that this socket is listening,
3512    * and that it has something pending.
3513    */
3514 
3515         if (sk->state != TCP_LISTEN) 
3516         {
3517                 sk->err = EINVAL;
3518                 return(NULL); 
3519         }
3520 
3521         /* Avoid the race. */
3522         cli();
3523         sk->inuse = 1;
3524 
3525         while((skb = tcp_dequeue_established(sk)) == NULL) 
3526         {
3527                 if (flags & O_NONBLOCK) 
3528                 {
3529                         sti();
3530                         release_sock(sk);
3531                         sk->err = EAGAIN;
3532                         return(NULL);
3533                 }
3534 
3535                 release_sock(sk);
3536                 interruptible_sleep_on(sk->sleep);
3537                 if (current->signal & ~current->blocked) 
3538                 {
3539                         sti();
3540                         sk->err = ERESTARTSYS;
3541                         return(NULL);
3542                 }
3543                 sk->inuse = 1;
3544         }
3545         sti();
3546 
3547         /*
3548          *      Now all we need to do is return skb->sk. 
3549          */
3550 
3551         newsk = skb->sk;
3552 
3553         kfree_skb(skb, FREE_READ);
3554         sk->ack_backlog--;
3555         release_sock(sk);
3556         return(newsk);
3557 }
3558 
3559 
3560 /*
3561  *      This will initiate an outgoing connection. 
3562  */
3563  
3564 static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3565 {
3566         struct sk_buff *buff;
3567         struct device *dev=NULL;
3568         unsigned char *ptr;
3569         int tmp;
3570         int atype;
3571         struct tcphdr *t1;
3572         struct rtable *rt;
3573 
3574         if (sk->state != TCP_CLOSE) 
3575                 return(-EISCONN);
3576 
3577         if (addr_len < 8) 
3578                 return(-EINVAL);
3579 
3580         if (usin->sin_family && usin->sin_family != AF_INET) 
3581                 return(-EAFNOSUPPORT);
3582 
3583         /*
3584          *      connect() to INADDR_ANY means loopback (BSD'ism).
3585          */
3586         
3587         if(usin->sin_addr.s_addr==INADDR_ANY)
3588                 usin->sin_addr.s_addr=ip_my_addr();
3589                   
3590         /*
3591          *      Don't want a TCP connection going to a broadcast address 
3592          */
3593 
3594         if ((atype=ip_chk_addr(usin->sin_addr.s_addr)) == IS_BROADCAST || atype==IS_MULTICAST) 
3595                 return -ENETUNREACH;
3596   
3597         sk->inuse = 1;
3598         sk->daddr = usin->sin_addr.s_addr;
3599         sk->write_seq = jiffies * SEQ_TICK - seq_offset;
3600         sk->window_seq = sk->write_seq;
3601         sk->rcv_ack_seq = sk->write_seq -1;
3602         sk->err = 0;
3603         sk->dummy_th.dest = usin->sin_port;
3604         release_sock(sk);
3605 
3606         buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3607         if (buff == NULL) 
3608         {
3609                 return(-ENOMEM);
3610         }
3611         sk->inuse = 1;
3612         buff->len = 24;
3613         buff->sk = sk;
3614         buff->free = 1;
3615         buff->localroute = sk->localroute;
3616         
3617         t1 = (struct tcphdr *) buff->data;
3618 
3619         /*
3620          *      Put in the IP header and routing stuff. 
3621          */
3622          
3623         rt=ip_rt_route(sk->daddr, NULL, NULL);
3624         
3625 
3626         /*
3627          *      We need to build the routing stuff from the things saved in skb. 
3628          */
3629 
3630         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3631                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3632         if (tmp < 0) 
3633         {
3634                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3635                 release_sock(sk);
3636                 return(-ENETUNREACH);
3637         }
3638 
3639         buff->len += tmp;
3640         t1 = (struct tcphdr *)((char *)t1 +tmp);
3641 
3642         memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3643         t1->seq = ntohl(sk->write_seq++);
3644         sk->sent_seq = sk->write_seq;
3645         buff->h.seq = sk->write_seq;
3646         t1->ack = 0;
3647         t1->window = 2;
3648         t1->res1=0;
3649         t1->res2=0;
3650         t1->rst = 0;
3651         t1->urg = 0;
3652         t1->psh = 0;
3653         t1->syn = 1;
3654         t1->urg_ptr = 0;
3655         t1->doff = 6;
3656         /* use 512 or whatever user asked for */
3657         
3658         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
3659                 sk->window_clamp=rt->rt_window;
3660         else
3661                 sk->window_clamp=0;
3662 
3663         if (sk->user_mss)
3664                 sk->mtu = sk->user_mss;
3665         else if(rt!=NULL && (rt->rt_flags&RTF_MTU))
3666                 sk->mtu = rt->rt_mss;
3667         else 
3668         {
3669 #ifdef CONFIG_INET_SNARL
3670                 if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3671 #else
3672                 if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3673 #endif
3674                         sk->mtu = 576 - HEADER_SIZE;
3675                 else
3676                         sk->mtu = MAX_WINDOW;
3677         }
3678         /*
3679          *      but not bigger than device MTU 
3680          */
3681 
3682         if(sk->mtu <32)
3683                 sk->mtu = 32;   /* Sanity limit */
3684                 
3685         sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3686         
3687         /*
3688          *      Put in the TCP options to say MTU. 
3689          */
3690 
3691         ptr = (unsigned char *)(t1+1);
3692         ptr[0] = 2;
3693         ptr[1] = 4;
3694         ptr[2] = (sk->mtu) >> 8;
3695         ptr[3] = (sk->mtu) & 0xff;
3696         tcp_send_check(t1, sk->saddr, sk->daddr,
3697                   sizeof(struct tcphdr) + 4, sk);
3698 
3699         /*
3700          *      This must go first otherwise a really quick response will get reset. 
3701          */
3702 
3703         tcp_set_state(sk,TCP_SYN_SENT);
3704         sk->rto = TCP_TIMEOUT_INIT;
3705         reset_timer(sk, TIME_WRITE, sk->rto);   /* Timer for repeating the SYN until an answer */
3706         sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3707 
3708         sk->prot->queue_xmit(sk, dev, buff, 0);  
3709         tcp_statistics.TcpActiveOpens++;
3710         tcp_statistics.TcpOutSegs++;
3711   
3712         release_sock(sk);
3713         return(0);
3714 }
3715 
3716 
3717 /* This functions checks to see if the tcp header is actually acceptable. */
3718 static int
3719 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3720              struct options *opt, unsigned long saddr, struct device *dev)
3721 {
3722         unsigned long next_seq;
3723 
3724         next_seq = len - 4*th->doff;
3725         if (th->fin)
3726                 next_seq++;
3727         /* if we have a zero window, we can't have any data in the packet.. */
3728         if (next_seq && !sk->window)
3729                 goto ignore_it;
3730         next_seq += th->seq;
3731 
3732         /*
3733          * This isn't quite right.  sk->acked_seq could be more recent
3734          * than sk->window.  This is however close enough.  We will accept
3735          * slightly more packets than we should, but it should not cause
3736          * problems unless someone is trying to forge packets.
3737          */
3738 
3739         /* have we already seen all of this packet? */
3740         if (!after(next_seq+1, sk->acked_seq))
3741                 goto ignore_it;
3742         /* or does it start beyond the window? */
3743         if (!before(th->seq, sk->acked_seq + sk->window + 1))
3744                 goto ignore_it;
3745 
3746         /* ok, at least part of this packet would seem interesting.. */
3747         return 1;
3748 
3749 ignore_it:
3750         if (th->rst)
3751                 return 0;
3752 
3753         /*
3754          *      Send a reset if we get something not ours and we are
3755          *      unsynchronized. Note: We don't do anything to our end. We
3756          *      are just killing the bogus remote connection then we will
3757          *      connect again and it will work (with luck).
3758          */
3759          
3760         if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) {
3761                 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3762                 return 1;
3763         }
3764 
3765         /* Try to resync things. */
3766         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3767         return 0;
3768 }
3769 
3770 
3771 #ifdef TCP_FASTPATH
3772 /*
3773  *      Is the end of the queue clear of fragments as yet unmerged into the data stream
3774  *      Yes if
3775  *      a) The queue is empty
3776  *      b) The last frame on the queue has the acked flag set
3777  */
3778 
3779 static inline int tcp_clean_end(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3780 {
3781         struct sk_buff *skb=skb_peek(&sk->receive_queue);
3782         if(skb==NULL || sk->receive_queue.prev->acked)
3783                 return 1;
3784 }
3785 
3786 #endif
3787 
3788 int
3789 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3790         unsigned long daddr, unsigned short len,
3791         unsigned long saddr, int redo, struct inet_protocol * protocol)
3792 {
3793         struct tcphdr *th;
3794         struct sock *sk;
3795 
3796         if (!skb) 
3797         {
3798                 return(0);
3799         }
3800 
3801         if (!dev) 
3802         {
3803                 return(0);
3804         }
3805   
3806         tcp_statistics.TcpInSegs++;
3807   
3808         if(skb->pkt_type!=PACKET_HOST)
3809         {
3810                 kfree_skb(skb,FREE_READ);
3811                 return(0);
3812         }
3813   
3814         th = skb->h.th;
3815 
3816         /*
3817          *      Find the socket.
3818          */
3819 
3820         sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3821 
3822         /*
3823          *      If this socket has got a reset its to all intents and purposes 
3824          *      really dead 
3825          */
3826          
3827         if (sk!=NULL && sk->zapped)
3828                 sk=NULL;
3829 
3830         if (!redo) 
3831         {
3832                 if (tcp_check(th, len, saddr, daddr )) 
3833                 {
3834                         skb->sk = NULL;
3835                         kfree_skb(skb,FREE_READ);
3836                         /*
3837                          * We don't release the socket because it was
3838                          * never marked in use.
3839                          */
3840                         return(0);
3841                 }
3842                 th->seq = ntohl(th->seq);
3843 
3844                 /* See if we know about the socket. */
3845                 if (sk == NULL) 
3846                 {
3847                         if (!th->rst)
3848                                 tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3849                         skb->sk = NULL;
3850                         kfree_skb(skb, FREE_READ);
3851                         return(0);
3852                 }
3853 
3854                 skb->len = len;
3855                 skb->acked = 0;
3856                 skb->used = 0;
3857                 skb->free = 0;
3858                 skb->saddr = daddr;
3859                 skb->daddr = saddr;
3860         
3861                 /* We may need to add it to the backlog here. */
3862                 cli();
3863                 if (sk->inuse) 
3864                 {
3865                         skb_queue_tail(&sk->back_log, skb);
3866                         sti();
3867                         return(0);
3868                 }
3869                 sk->inuse = 1;
3870                 sti();
3871         }
3872         else
3873         {
3874                 if (!sk) 
3875                 {
3876                         return(0);
3877                 }
3878         }
3879 
3880 
3881         if (!sk->prot) 
3882         {
3883                 return(0);
3884         }
3885 
3886 
3887         /*
3888          *      Charge the memory to the socket. 
3889          */
3890          
3891         if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) 
3892         {
3893                 kfree_skb(skb, FREE_READ);
3894                 release_sock(sk);
3895                 return(0);
3896         }
3897 
3898         skb->sk=sk;
3899         sk->rmem_alloc += skb->mem_len;
3900 
3901 #ifdef TCP_FASTPATH
3902 /*
3903  *      Incoming data stream fastpath. 
3904  *
3905  *      We try to optimise two things.
3906  *      1) Spot general data arriving without funny options and skip extra checks and the switch.
3907  *      2) Spot the common case in raw data receive streams of a packet that has no funny options,
3908  *      fits exactly on the end of the current queue and may or may not have the ack bit set.
3909  *
3910  *      Case two especially is done inline in this routine so there are no long jumps causing heavy
3911  *      cache thrashing, no function call overhead (except for the ack sending if needed) and for
3912  *      speed although further optimizing here is possible.
3913  */
3914  
3915         /* I'm trusting gcc to optimise this sensibly... might need judicious application of a software mallet */
3916         if(!(sk->shutdown & RCV_SHUTDOWN) && sk->state==TCP_ESTABLISHED && !th->urg && !th->syn && !th->fin && !th->rst)
3917         {       
3918                 /* Packets in order. Fits window */
3919                 if(th->seq == sk->acked_seq+1 && sk->window && tcp_clean_end(sk))
3920                 {
3921                         /* Ack is harder */
3922                         if(th->ack && !tcp_ack(sk, th, saddr, len))
3923                         {
3924                                 kfree_skb(skb, FREE_READ);
3925                                 release_sock(sk);
3926                                 return 0;
3927                         }
3928                         /*
3929                          *      Set up variables
3930                          */
3931                         skb->len -= (th->doff *4);
3932                         sk->bytes_rcv += skb->len;
3933                         tcp_rx_hit2++;
3934                         if(skb->len)
3935                         {
3936                                 skb_queue_tail(&sk->receive_queue,skb); /* We already know where to put it */
3937                                 if(sk->window >= skb->len)
3938                                         sk->window-=skb->len;                   /* We know its effect on the window */
3939                                 else
3940                                         sk->window=0;
3941                                 sk->acked_seq = th->seq+skb->len;       /* Easy */
3942                                 skb->acked=1;                           /* Guaranteed true */
3943                                 if(!sk->delay_acks || sk->ack_backlog >= sk->max_ack_backlog || 
3944                                         sk->bytes_rcv > sk->max_unacked)
3945                                 {
3946                                         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th , saddr);
3947                                 }
3948                                 else
3949                                 {
3950                                         sk->ack_backlog++;
3951                                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3952                                 }
3953                                 if(!sk->dead)
3954                                         sk->data_ready(sk,0);
3955                                 release_sock(sk);
3956                                 return 0;
3957                         }
3958                 }
3959                 /*
3960                  *      More generic case of arriving data stream in ESTABLISHED
3961                  */
3962                 tcp_rx_hit1++;
3963                 if(!tcp_sequence(sk, th, len, opt, saddr, dev))
3964                 {
3965                         kfree_skb(skb, FREE_READ);
3966                         release_sock(sk);
3967                         return 0;
3968                 }
3969                 if(th->ack && !tcp_ack(sk, th, saddr, len))
3970                 {
3971                         kfree_skb(skb, FREE_READ);
3972                         release_sock(sk);
3973                         return 0;
3974                 }
3975                 if(tcp_data(skb, sk, saddr, len))
3976                         kfree_skb(skb, FREE_READ);
3977                 release_sock(sk);
3978                 return 0;
3979         }
3980         tcp_rx_miss++;
3981 #endif  
3982 
3983         /*
3984          *      Now deal with all cases.
3985          */
3986          
3987         switch(sk->state) 
3988         {
3989         
3990                 /*
3991                  * This should close the system down if it's waiting
3992                  * for an ack that is never going to be sent.
3993                  */
3994                 case TCP_LAST_ACK:
3995                         if (th->rst) 
3996                         {
3997                                 sk->zapped=1;
3998                                 sk->err = ECONNRESET;
3999                                 tcp_set_state(sk,TCP_CLOSE);
4000                                 sk->shutdown = SHUTDOWN_MASK;
4001                                 if (!sk->dead) 
4002                                 {
4003                                         sk->state_change(sk);
4004                                 }
4005                                 kfree_skb(skb, FREE_READ);
4006                                 release_sock(sk);
4007                                 return(0);
4008                         }
4009 
4010                 case TCP_ESTABLISHED:
4011                 case TCP_CLOSE_WAIT:
4012                 case TCP_CLOSING:
4013                 case TCP_FIN_WAIT1:
4014                 case TCP_FIN_WAIT2:
4015                 case TCP_TIME_WAIT:
4016 
4017                         /*
4018                          * is it a good packet?
4019                          */
4020 
4021                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
4022                         {
4023                                 kfree_skb(skb, FREE_READ);
4024                                 release_sock(sk);
4025                                 return(0);
4026                         }
4027 
4028                         if (th->rst) 
4029                         {
4030                                 tcp_statistics.TcpEstabResets++;
4031                                 sk->zapped=1;
4032                                 /* This means the thing should really be closed. */
4033                                 sk->err = ECONNRESET;
4034                                 if (sk->state == TCP_CLOSE_WAIT) 
4035                                 {
4036                                         sk->err = EPIPE;
4037                                 }
4038                                 tcp_set_state(sk,TCP_CLOSE);
4039                                 sk->shutdown = SHUTDOWN_MASK;
4040                                 if (!sk->dead) 
4041                                 {
4042                                         sk->state_change(sk);
4043                                 }
4044                                 kfree_skb(skb, FREE_READ);
4045                                 release_sock(sk);
4046                                 return(0);
4047                         }
4048                         if (th->syn) 
4049                         {
4050                                 long seq=sk->write_seq;
4051                                 int st=sk->state;
4052                                 tcp_statistics.TcpEstabResets++;
4053                                 sk->err = ECONNRESET;
4054                                 tcp_set_state(sk,TCP_CLOSE);
4055                                 sk->shutdown = SHUTDOWN_MASK;
4056                                 if(sk->debug)
4057                                         printk("Socket %p reset by SYN while established.\n", sk);
4058                                 if (!sk->dead) {
4059                                         sk->state_change(sk);
4060                                 }
4061                                 /*
4062                                  *      The BSD port reuse protocol violation.
4063                                  *      I do sometimes wonder how the *bsd people
4064                                  *      have the nerve to talk about 'standards'.
4065                                  *
4066                                  *      If seq > last used on connection then
4067                                  *      open a new connection and use 128000+seq of
4068                                  *      old connection.
4069                                  *
4070                                  */
4071                                  
4072                                 if(st==TCP_TIME_WAIT && th->seq > sk->acked_seq && sk->dead)
4073                                 {
4074                                         struct sock *psk=sk;
4075                                         /*
4076                                          *      Find the listening socket.
4077                                          */
4078                                         sk=get_sock(&tcp_prot, th->source, daddr, th->dest, saddr);
4079                                         if(sk && sk->state==TCP_LISTEN)
4080                                         {
4081                                                 sk->inuse=1;
4082                                                 tcp_conn_request(sk, skb, daddr, saddr,opt, dev,seq+128000);
4083                                                 release_sock(psk);
4084                                                 /* Fall through in case people are
4085                                                    also using the piggy backed SYN + data 
4086                                                    protocol violation */
4087                                         }
4088                                         else
4089                                         {
4090                                                 tcp_reset(daddr, saddr,  th, psk->prot, opt,dev, psk->ip_tos,psk->ip_ttl);
4091                                                 release_sock(psk);
4092                                                 kfree_skb(skb, FREE_READ);
4093                                                 return 0;
4094                                         }                       
4095                                 }
4096                                 else
4097                                 {
4098                                         tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
4099                                         kfree_skb(skb, FREE_READ);
4100                                         release_sock(sk);
4101                                         return(0);
4102                                 }
4103                         }       
4104                         if (th->ack && !tcp_ack(sk, th, saddr, len)) {
4105                                 kfree_skb(skb, FREE_READ);
4106                                 release_sock(sk);
4107                                 return(0);
4108                         }
4109         
4110                         if (tcp_urg(sk, th, saddr, len)) {
4111                                 kfree_skb(skb, FREE_READ);
4112                                 release_sock(sk);
4113                                 return(0);
4114                         }
4115 
4116         
4117                         if (tcp_data(skb, sk, saddr, len)) {
4118                                 kfree_skb(skb, FREE_READ);
4119                                 release_sock(sk);
4120                                 return(0);
4121                         }       
4122 
4123                         if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) {
4124                                 kfree_skb(skb, FREE_READ);
4125                                 release_sock(sk);
4126                                 return(0);
4127                         }
4128         
4129                         release_sock(sk);
4130                         return(0);
4131 
4132 
4133                 case TCP_CLOSE:
4134                         if (sk->dead || sk->daddr) {
4135                                 kfree_skb(skb, FREE_READ);
4136                                         release_sock(sk);
4137                                 return(0);
4138                         }
4139         
4140                         if (!th->rst) {
4141                                 if (!th->ack)
4142                                         th->ack_seq = 0;
4143                                 if(sk->debug) printk("Reset on closed socket %d.\n",sk->blog);
4144                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4145                         }
4146                         kfree_skb(skb, FREE_READ);
4147                         release_sock(sk);
4148                                 return(0);
4149         
4150                 case TCP_LISTEN:
4151                         if (th->rst) {
4152                                 kfree_skb(skb, FREE_READ);
4153                                 release_sock(sk);
4154                                 return(0);
4155                         }
4156                         if (th->ack) {
4157                                 if(sk->debug) printk("Reset on listening socket %d.\n",sk->blog);
4158                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4159                                 kfree_skb(skb, FREE_READ);
4160                                 release_sock(sk);
4161                                 return(0);
4162                         }
4163         
4164                         if (th->syn) 
4165                         {
4166                                 /*
4167                                  * Now we just put the whole thing including
4168                                  * the header and saddr, and protocol pointer
4169                                  * into the buffer.  We can't respond until the
4170                                  * user tells us to accept the connection.
4171                                  */
4172                                 tcp_conn_request(sk, skb, daddr, saddr, opt, dev, tcp_init_seq());
4173                                 release_sock(sk);
4174                                 return(0);
4175                         }
4176 
4177                         kfree_skb(skb, FREE_READ);
4178                         release_sock(sk);
4179                         return(0);
4180 
4181                 case TCP_SYN_RECV:
4182                         if (th->syn) {
4183                                 /* Probably a retransmitted syn */
4184                                 kfree_skb(skb, FREE_READ);
4185                                 release_sock(sk);
4186                                 return(0);
4187                         }
4188         
4189         
4190                 default:
4191                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
4192                         {
4193                                 kfree_skb(skb, FREE_READ);
4194                                 release_sock(sk);
4195                                 return(0);
4196                         }
4197         
4198                 case TCP_SYN_SENT:
4199                         if (th->rst) 
4200                         {
4201                                 tcp_statistics.TcpAttemptFails++;
4202                                 sk->err = ECONNREFUSED;
4203                                 tcp_set_state(sk,TCP_CLOSE);
4204                                 sk->shutdown = SHUTDOWN_MASK;
4205                                 sk->zapped = 1;
4206                                 if (!sk->dead) 
4207                                 {
4208                                         sk->state_change(sk);
4209                                 }
4210                                 kfree_skb(skb, FREE_READ);
4211                                 release_sock(sk);
4212                                 return(0);
4213                         }
4214                         if (!th->ack) 
4215                         {
4216                                 if (th->syn) 
4217                                 {
4218                                         /* Crossed SYN's are fine - but talking to
4219                                            yourself is right out... */
4220                                         if(sk->saddr==saddr && sk->daddr==daddr &&
4221                                                 sk->dummy_th.source==th->source &&
4222                                                 sk->dummy_th.dest==th->dest)
4223                                         {
4224                                                 tcp_statistics.TcpAttemptFails++;
4225                                                 sk->err = ECONNREFUSED;
4226                                                 tcp_set_state(sk,TCP_CLOSE);
4227                                                 sk->shutdown = SHUTDOWN_MASK;
4228                                                 sk->zapped = 1;
4229                                                 if (!sk->dead) 
4230                                                 {
4231                                                         sk->state_change(sk);
4232                                                 }
4233                                                 kfree_skb(skb, FREE_READ);
4234                                                 release_sock(sk);
4235                                                 return(0);
4236                                         }
4237                                         tcp_set_state(sk,TCP_SYN_RECV);
4238                                 }
4239                                 kfree_skb(skb, FREE_READ);
4240                                 release_sock(sk);
4241                                 return(0);
4242                         }
4243         
4244                         switch(sk->state) 
4245                         {
4246                                 case TCP_SYN_SENT:
4247                                         if (!tcp_ack(sk, th, saddr, len)) 
4248                                         {
4249                                                 tcp_statistics.TcpAttemptFails++;
4250                                                 tcp_reset(daddr, saddr, th,
4251                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4252                                                 kfree_skb(skb, FREE_READ);
4253                                                         release_sock(sk);
4254                                                 return(0);
4255                                         }
4256         
4257                                         /*
4258                                          * If the syn bit is also set, switch to
4259                                          * tcp_syn_recv, and then to established.
4260                                          */
4261                                         if (!th->syn) 
4262                                         {
4263                                                 kfree_skb(skb, FREE_READ);
4264                                                 release_sock(sk);
4265                                                 return(0);
4266                                         }
4267         
4268                                         /* Ack the syn and fall through. */
4269                                         sk->acked_seq = th->seq+1;
4270                                         sk->fin_seq = th->seq;
4271                                         tcp_send_ack(sk->sent_seq, th->seq+1,
4272                                                 sk, th, sk->daddr);
4273                 
4274                                 case TCP_SYN_RECV:
4275                                         if (!tcp_ack(sk, th, saddr, len)) 
4276                                         {
4277                                                 tcp_statistics.TcpAttemptFails++;
4278                                                 tcp_reset(daddr, saddr, th,
4279                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
4280                                                 kfree_skb(skb, FREE_READ);
4281                                                 release_sock(sk);
4282                                                 return(0);
4283                                         }
4284         
4285                                         tcp_set_state(sk,TCP_ESTABLISHED);
4286         
4287                                         /*
4288                                          *      Now we need to finish filling out
4289                                          *      some of the tcp header.
4290                                          * 
4291                                          *      We need to check for mtu info. 
4292                                          */
4293                                         tcp_options(sk, th);
4294                                         sk->dummy_th.dest = th->source;
4295                                         sk->copied_seq = sk->acked_seq-1;
4296                                         if (!sk->dead) 
4297                                         {
4298                                                 sk->state_change(sk);
4299                                         }
4300         
4301                                         /*
4302                                          * We've already processed his first
4303                                          * ack.  In just about all cases that
4304                                          * will have set max_window.  This is
4305                                          * to protect us against the possibility
4306                                          * that the initial window he sent was 0.
4307                                          * This must occur after tcp_options, which
4308                                          * sets sk->mtu.
4309                                          */
4310                                         if (sk->max_window == 0) 
4311                                         {
4312                                                 sk->max_window = 32;
4313                                                 sk->mss = min(sk->max_window, sk->mtu);
4314                                         }
4315 
4316                                         /*
4317                                          * Now process the rest like we were
4318                                          * already in the established state.
4319                                          */
4320                                         if (th->urg) 
4321                                         {
4322                                                 if (tcp_urg(sk, th, saddr, len)) 
4323                                                 { 
4324                                                         kfree_skb(skb, FREE_READ);
4325                                                         release_sock(sk);
4326                                                         return(0);
4327                                                 }
4328                                         }
4329                                         if (tcp_data(skb, sk, saddr, len))
4330                                                 kfree_skb(skb, FREE_READ);
4331 
4332                                         if (th->fin)
4333                                                 tcp_fin(skb, sk, th, saddr, dev);
4334                                         release_sock(sk);
4335                                         return(0);
4336                         }
4337         
4338                         if (th->urg) 
4339                         {
4340                                 if (tcp_urg(sk, th, saddr, len)) 
4341                                 {
4342                                         kfree_skb(skb, FREE_READ);
4343                                         release_sock(sk);
4344                                         return(0);
4345                                 }
4346                         }
4347                         if (tcp_data(skb, sk, saddr, len)) 
4348                         {
4349                                 kfree_skb(skb, FREE_READ);
4350                                 release_sock(sk);
4351                                 return(0);
4352                         }
4353         
4354                         if (!th->fin) 
4355                         {
4356                                 release_sock(sk);
4357                                 return(0);
4358                         }
4359                         tcp_fin(skb, sk, th, saddr, dev);
4360                         release_sock(sk);
4361                         return(0);
4362         }
4363 }
4364 
4365 
4366 /*
4367  * This routine sends a packet with an out of date sequence
4368  * number. It assumes the other end will try to ack it.
4369  */
4370 
4371 static void tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4372 {
4373         struct sk_buff *buff;
4374         struct tcphdr *t1;
4375         struct device *dev=NULL;
4376         int tmp;
4377 
4378         if (sk->zapped)
4379                 return; /* After a valid reset we can send no more */
4380 
4381         /*
4382          * Write data can still be transmitted/retransmitted in the
4383          * following states.  If any other state is encountered, return.
4384          */
4385 
4386         if (sk->state != TCP_ESTABLISHED && 
4387             sk->state != TCP_CLOSE_WAIT &&
4388             sk->state != TCP_FIN_WAIT1 && 
4389             sk->state != TCP_LAST_ACK &&
4390             sk->state != TCP_CLOSING
4391         ) {
4392                 return;
4393         }
4394 
4395         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
4396         if (buff == NULL) 
4397                 return;
4398 
4399         buff->len = sizeof(struct tcphdr);
4400         buff->free = 1;
4401         buff->sk = sk;
4402         buff->localroute = sk->localroute;
4403 
4404         t1 = (struct tcphdr *) buff->data;
4405 
4406         /* Put in the IP header and routing stuff. */
4407         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
4408                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
4409         if (tmp < 0) 
4410         {
4411                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
4412                 return;
4413         }
4414 
4415         buff->len += tmp;
4416         t1 = (struct tcphdr *)((char *)t1 +tmp);
4417 
4418         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
4419 
4420         /*
4421          * Use a previous sequence.
4422          * This should cause the other end to send an ack.
4423          */
4424         t1->seq = htonl(sk->sent_seq-1);
4425         t1->ack = 1; 
4426         t1->res1= 0;
4427         t1->res2= 0;
4428         t1->rst = 0;
4429         t1->urg = 0;
4430         t1->psh = 0;
4431         t1->fin = 0;
4432         t1->syn = 0;
4433         t1->ack_seq = ntohl(sk->acked_seq);
4434         t1->window = ntohs(tcp_select_window(sk));
4435         t1->doff = sizeof(*t1)/4;
4436         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
4437 
4438          /*     Send it and free it.
4439           *     This will prevent the timer from automatically being restarted.
4440           */
4441         sk->prot->queue_xmit(sk, dev, buff, 1);
4442         tcp_statistics.TcpOutSegs++;
4443 }
4444 
4445 void
4446 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4447 {
4448         if (sk->zapped)
4449                 return;         /* After a valid reset we can send no more */
4450 
4451         tcp_write_wakeup(sk);
4452 
4453         sk->backoff++;
4454         sk->rto = min(sk->rto << 1, 120*HZ);
4455         reset_timer (sk, TIME_PROBE0, sk->rto);
4456         sk->retransmits++;
4457         sk->prot->retransmits ++;
4458 }
4459 
4460 /*
4461  *      Socket option code for TCP. 
4462  */
4463   
4464 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4465 {
4466         int val,err;
4467 
4468         if(level!=SOL_TCP)
4469                 return ip_setsockopt(sk,level,optname,optval,optlen);
4470 
4471         if (optval == NULL) 
4472                 return(-EINVAL);
4473 
4474         err=verify_area(VERIFY_READ, optval, sizeof(int));
4475         if(err)
4476                 return err;
4477         
4478         val = get_fs_long((unsigned long *)optval);
4479 
4480         switch(optname)
4481         {
4482                 case TCP_MAXSEG:
4483 /*
4484  * values greater than interface MTU won't take effect.  however at
4485  * the point when this call is done we typically don't yet know
4486  * which interface is going to be used
4487  */
4488                         if(val<1||val>MAX_WINDOW)
4489                                 return -EINVAL;
4490                         sk->user_mss=val;
4491                         return 0;
4492                 case TCP_NODELAY:
4493                         sk->nonagle=(val==0)?0:1;
4494                         return 0;
4495                 default:
4496                         return(-ENOPROTOOPT);
4497         }
4498 }
4499 
4500 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4501 {
4502         int val,err;
4503 
4504         if(level!=SOL_TCP)
4505                 return ip_getsockopt(sk,level,optname,optval,optlen);
4506                         
4507         switch(optname)
4508         {
4509                 case TCP_MAXSEG:
4510                         val=sk->user_mss;
4511                         break;
4512                 case TCP_NODELAY:
4513                         val=sk->nonagle;
4514                         break;
4515                 default:
4516                         return(-ENOPROTOOPT);
4517         }
4518         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
4519         if(err)
4520                 return err;
4521         put_fs_long(sizeof(int),(unsigned long *) optlen);
4522 
4523         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
4524         if(err)
4525                 return err;
4526         put_fs_long(val,(unsigned long *)optval);
4527 
4528         return(0);
4529 }       
4530 
4531 
4532 struct proto tcp_prot = {
4533         sock_wmalloc,
4534         sock_rmalloc,
4535         sock_wfree,
4536         sock_rfree,
4537         sock_rspace,
4538         sock_wspace,
4539         tcp_close,
4540         tcp_read,
4541         tcp_write,
4542         tcp_sendto,
4543         tcp_recvfrom,
4544         ip_build_header,
4545         tcp_connect,
4546         tcp_accept,
4547         ip_queue_xmit,
4548         tcp_retransmit,
4549         tcp_write_wakeup,
4550         tcp_read_wakeup,
4551         tcp_rcv,
4552         tcp_select,
4553         tcp_ioctl,
4554         NULL,
4555         tcp_shutdown,
4556         tcp_setsockopt,
4557         tcp_getsockopt,
4558         128,
4559         0,
4560         {NULL,},
4561         "TCP"
4562 };

/* [previous][next][first][last][top][bottom][index][help] */