root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. tcp_set_state
  3. tcp_select_window
  4. tcp_find_established
  5. tcp_dequeue_established
  6. tcp_time_wait
  7. tcp_retransmit
  8. tcp_err
  9. tcp_readable
  10. tcp_select
  11. tcp_ioctl
  12. tcp_check
  13. tcp_send_check
  14. tcp_send_skb
  15. tcp_dequeue_partial
  16. tcp_send_partial
  17. tcp_enqueue_partial
  18. tcp_send_ack
  19. tcp_build_header
  20. tcp_write
  21. tcp_sendto
  22. tcp_read_wakeup
  23. cleanup_rbuf
  24. tcp_read_urg
  25. tcp_read
  26. tcp_shutdown
  27. tcp_recvfrom
  28. tcp_reset
  29. tcp_options
  30. default_mask
  31. tcp_conn_request
  32. tcp_close
  33. tcp_write_xmit
  34. tcp_ack
  35. tcp_data
  36. tcp_check_urg
  37. tcp_urg
  38. tcp_fin
  39. tcp_accept
  40. tcp_connect
  41. tcp_sequence
  42. tcp_clean_end
  43. tcp_rcv
  44. tcp_write_wakeup
  45. tcp_send_probe0
  46. tcp_setsockopt
  47. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@no.unit.nvg>
  20  *
  21  * Fixes:       
  22  *              Alan Cox        :       Numerous verify_area() calls
  23  *              Alan Cox        :       Set the ACK bit on a reset
  24  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  25  *                                      and was trying to connect (tcp_err()).
  26  *              Alan Cox        :       All icmp error handling was broken
  27  *                                      pointers passed where wrong and the
  28  *                                      socket was looked up backwards. Nobody
  29  *                                      tested any icmp error code obviously.
  30  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  31  *                                      on errors. select behaves and the icmp error race
  32  *                                      has gone by moving it into sock.c
  33  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  34  *                                      packets for unknown sockets.
  35  *              Alan Cox        :       tcp option processing.
  36  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  37  *              Herp Rosmanith  :       More reset fixes
  38  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  39  *                                      any kind of RST is right out.
  40  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  41  *                                      otherwise odd bits of prattle escape still
  42  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  43  *                                      LAN workplace lockups.
  44  *              Alan Cox        :       Some tidyups using the new skb list facilities
  45  *              Alan Cox        :       sk->keepopen now seems to work
  46  *              Alan Cox        :       Pulls options out correctly on accepts
  47  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  48  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  49  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  50  *              Alan Cox        :       Added some beter commenting, as the tcp is hard to follow
  51  *              Alan Cox        :       Removed incorrect check for 20 * psh
  52  *      Michael O'Reilly        :       ack < copied bug fix.
  53  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  54  *              Alan Cox        :       FIN with no memory -> CRASH
  55  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  56  *              Alan Cox        :       Added TCP options (SOL_TCP)
  57  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  58  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  59  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  60  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  61  *              Alan Cox        :       Put in missing check for SYN bit.
  62  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  63  *                                      window non shrink trick.
  64  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  65  *              Charles Hedrick :       TCP fixes
  66  *              Toomas Tamm     :       TCP window fixes
  67  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  68  *              Charles Hedrick :       Rewrote most of it to actually work
  69  *              Linus           :       Rewrote tcp_read() and URG handling
  70  *                                      completely
  71  *              Gerhard Koerting:       Fixed some missing timer handling
  72  *              Matthew Dillon  :       Reworked TCP machine states as per RFC
  73  *              Gerhard Koerting:       PC/TCP workarounds
  74  *              Adam Caldwell   :       Assorted timer/timing errors
  75  *              Matthew Dillon  :       Fixed another RST bug
  76  *              Alan Cox        :       Move to kernel side addressing changes.
  77  *              Alan Cox        :       Beginning work on TCP fastpathing (not yet usable)
  78  *              Arnt Gulbrandsen:       Turbocharged tcp_check() routine.
  79  *              Alan Cox        :       TCP fast path debugging
  80  *              Alan Cox        :       Window clamping
  81  *              Michael Riepe   :       Bug in tcp_check()
  82  *              Matt Dillon     :       More TCP improvements and RST bug fixes
  83  *              Matt Dillon     :       Yet more small nasties remove from the TCP code
  84  *                                      (Be very nice to this man if tcp finally works 100%) 8)
  85  *              Alan Cox        :       BSD accept semantics. 
  86  *              Alan Cox        :       Reset on closedown bug.
  87  *      Peter De Schrijver      :       ENOTCONN check missing in tcp_sendto().
  88  *              Michael Pall    :       Handle select() after URG properly in all cases.
  89  *              Michael Pall    :       Undo the last fix in tcp_read_urg() (multi URG PUSH broke rlogin).
  90  *              Michael Pall    :       Fix the multi URG PUSH problem in tcp_readable(), select() after URG works now.
  91  *              Michael Pall    :       recv(...,MSG_OOB) never blocks in the BSD api.
  92  *              Alan Cox        :       Changed the semantics of sk->socket to 
  93  *                                      fix a race and a signal problem with
  94  *                                      accept() and async I/O.
  95  *              Alan Cox        :       Relaxed the rules on tcp_sendto().
  96  *              Yury Shevchuk   :       Really fixed accept() blocking problem.
  97  *
  98  *
  99  * To Fix:
 100  *                      Fast path the code. Two things here - fix the window calculation
 101  *              so it doesn't iterate over the queue, also spot packets with no funny
 102  *              options arriving in order and process directly.
 103  *
 104  *              This program is free software; you can redistribute it and/or
 105  *              modify it under the terms of the GNU General Public License
 106  *              as published by the Free Software Foundation; either version
 107  *              2 of the License, or(at your option) any later version.
 108  *
 109  * Description of States:
 110  *
 111  *      TCP_SYN_SENT            sent a connection request, waiting for ack
 112  *
 113  *      TCP_SYN_RECV            received a connection request, sent ack,
 114  *                              waiting for final ack in three-way handshake.
 115  *
 116  *      TCP_ESTABLISHED         connection established
 117  *
 118  *      TCP_FIN_WAIT1           our side has shutdown, waiting to complete
 119  *                              transmission of remaining buffered data
 120  *
 121  *      TCP_FIN_WAIT2           all buffered data sent, waiting for remote
 122  *                              to shutdown
 123  *
 124  *      TCP_CLOSING             both sides have shutdown but we still have
 125  *                              data we have to finish sending
 126  *
 127  *      TCP_TIME_WAIT           timeout to catch resent junk before entering
 128  *                              closed, can only be entered from FIN_WAIT2
 129  *                              or CLOSING.  Required because the other end
 130  *                              may not have gotten our last ACK causing it
 131  *                              to retransmit the data packet (which we ignore)
 132  *
 133  *      TCP_CLOSE_WAIT          remote side has shutdown and is waiting for
 134  *                              us to finish writing our data and to shutdown
 135  *                              (we have to close() to move on to LAST_ACK)
 136  *
 137  *      TCP_LAST_ACK            out side has shutdown after remote has
 138  *                              shutdown.  There may still be data in our
 139  *                              buffer that we have to finish sending
 140  *              
 141  *      TCP_CLOSE               socket is finished
 142  */
 143 #include <linux/types.h>
 144 #include <linux/sched.h>
 145 #include <linux/mm.h>
 146 #include <linux/string.h>
 147 #include <linux/socket.h>
 148 #include <linux/sockios.h>
 149 #include <linux/termios.h>
 150 #include <linux/in.h>
 151 #include <linux/fcntl.h>
 152 #include <linux/inet.h>
 153 #include <linux/netdevice.h>
 154 #include "snmp.h"
 155 #include "ip.h"
 156 #include "protocol.h"
 157 #include "icmp.h"
 158 #include "tcp.h"
 159 #include <linux/skbuff.h>
 160 #include "sock.h"
 161 #include "route.h"
 162 #include <linux/errno.h>
 163 #include <linux/timer.h>
 164 #include <asm/system.h>
 165 #include <asm/segment.h>
 166 #include <linux/mm.h>
 167 
 168 #undef TCP_FASTPATH
 169 
 170 #define SEQ_TICK 3
 171 unsigned long seq_offset;
 172 struct tcp_mib  tcp_statistics;
 173 
 174 #ifdef TCP_FASTPATH
 175 unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0;
 176 #endif
 177 
 178 
 179 static __inline__ int min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 180 {
 181         if (a < b) 
 182                 return(a);
 183         return(b);
 184 }
 185 
 186 #undef STATE_TRACE
 187 
 188 static __inline__ void tcp_set_state(struct sock *sk, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 189 {
 190         if(sk->state==TCP_ESTABLISHED)
 191                 tcp_statistics.TcpCurrEstab--;
 192 #ifdef STATE_TRACE
 193         if(sk->debug)
 194                 printk("TCP sk=%s, State %d -> %d\n",sk, sk->state,state);
 195 #endif  
 196         sk->state=state;
 197         if(state==TCP_ESTABLISHED)
 198                 tcp_statistics.TcpCurrEstab++;
 199 }
 200 
 201 /* This routine picks a TCP windows for a socket based on
 202    the following constraints
 203    
 204    1. The window can never be shrunk once it is offered (RFC 793)
 205    2. We limit memory per socket
 206    
 207    For now we use NET2E3's heuristic of offering half the memory
 208    we have handy. All is not as bad as this seems however because
 209    of two things. Firstly we will bin packets even within the window
 210    in order to get the data we are waiting for into the memory limit.
 211    Secondly we bin common duplicate forms at receive time
 212    
 213    Better heuristics welcome
 214 */
 215    
 216 int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 217 {
 218         int new_window = sk->prot->rspace(sk);
 219         
 220         if(sk->window_clamp)
 221                 new_window=min(sk->window_clamp,new_window);
 222 /*
 223  * two things are going on here.  First, we don't ever offer a
 224  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 225  * receiver side of SWS as specified in RFC1122.
 226  * Second, we always give them at least the window they
 227  * had before, in order to avoid retracting window.  This
 228  * is technically allowed, but RFC1122 advises against it and
 229  * in practice it causes trouble.
 230  */
 231         if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window)
 232                 return(sk->window);
 233         return(new_window);
 234 }
 235 
 236 /*
 237  *      Find someone to 'accept'. Must be called with
 238  *      sk->inuse=1 or cli()
 239  */ 
 240 
 241 static struct sk_buff *tcp_find_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 242 {
 243         struct sk_buff *p=skb_peek(&s->receive_queue);
 244         if(p==NULL)
 245                 return NULL;
 246         do
 247         {
 248                 if(p->sk->state == TCP_ESTABLISHED || p->sk->state >= TCP_FIN_WAIT1)
 249                         return p;
 250                 p=p->next;
 251         }
 252         while(p!=skb_peek(&s->receive_queue));
 253         return NULL;
 254 }
 255 
 256 static struct sk_buff *tcp_dequeue_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 257 {
 258         struct sk_buff *skb;
 259         unsigned long flags;
 260         save_flags(flags);
 261         cli(); 
 262         skb=tcp_find_established(s);
 263         if(skb!=NULL)
 264                 skb_unlink(skb);        /* Take it off the queue */
 265         restore_flags(flags);
 266         return skb;
 267 }
 268 
 269 
 270 /*
 271  *      Enter the time wait state. 
 272  */
 273 
 274 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 275 {
 276         tcp_set_state(sk,TCP_TIME_WAIT);
 277         sk->shutdown = SHUTDOWN_MASK;
 278         if (!sk->dead)
 279                 sk->state_change(sk);
 280         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 281 }
 282 
 283 /*
 284  *      A timer event has trigger a tcp retransmit timeout. The
 285  *      socket xmit queue is ready and set up to send. Because
 286  *      the ack receive code keeps the queue straight we do
 287  *      nothing clever here.
 288  */
 289 
 290 static void tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 291 {
 292         if (all) 
 293         {
 294                 ip_retransmit(sk, all);
 295                 return;
 296         }
 297 
 298         sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 299         /* sk->ssthresh in theory can be zero.  I guess that's OK */
 300         sk->cong_count = 0;
 301 
 302         sk->cong_window = 1;
 303 
 304         /* Do the actual retransmit. */
 305         ip_retransmit(sk, all);
 306 }
 307 
 308 
 309 /*
 310  * This routine is called by the ICMP module when it gets some
 311  * sort of error condition.  If err < 0 then the socket should
 312  * be closed and the error returned to the user.  If err > 0
 313  * it's just the icmp type << 8 | icmp code.  After adjustment
 314  * header points to the first 8 bytes of the tcp header.  We need
 315  * to find the appropriate port.
 316  */
 317 
 318 void tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 319         unsigned long saddr, struct inet_protocol *protocol)
 320 {
 321         struct tcphdr *th;
 322         struct sock *sk;
 323         struct iphdr *iph=(struct iphdr *)header;
 324   
 325         header+=4*iph->ihl;
 326    
 327 
 328         th =(struct tcphdr *)header;
 329         sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr);
 330 
 331         if (sk == NULL) 
 332                 return;
 333   
 334         if(err<0)
 335         {
 336                 sk->err = -err;
 337                 sk->error_report(sk);
 338                 return;
 339         }
 340 
 341         if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) 
 342         {
 343                 /*
 344                  * FIXME:
 345                  * For now we will just trigger a linear backoff.
 346                  * The slow start code should cause a real backoff here.
 347                  */
 348                 if (sk->cong_window > 4)
 349                         sk->cong_window--;
 350                 return;
 351         }
 352 
 353 /*      sk->err = icmp_err_convert[err & 0xff].errno;  -- moved as TCP should hide non fatals internally (and does) */
 354 
 355         /*
 356          * If we've already connected we will keep trying
 357          * until we time out, or the user gives up.
 358          */
 359 
 360         if (icmp_err_convert[err & 0xff].fatal || sk->state == TCP_SYN_SENT) 
 361         {
 362                 if (sk->state == TCP_SYN_SENT) 
 363                 {
 364                         tcp_statistics.TcpAttemptFails++;
 365                         tcp_set_state(sk,TCP_CLOSE);
 366                         sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 367                 }
 368                 sk->err = icmp_err_convert[err & 0xff].errno;           
 369         }
 370         return;
 371 }
 372 
 373 
 374 /*
 375  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 376  *      in the received data queue (ie a frame missing that needs sending to us)
 377  */
 378 
 379 static int tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 380 {
 381         unsigned long counted;
 382         unsigned long amount;
 383         struct sk_buff *skb;
 384         int sum;
 385         unsigned long flags;
 386 
 387         if(sk && sk->debug)
 388                 printk("tcp_readable: %p - ",sk);
 389 
 390         save_flags(flags);
 391         cli();
 392         if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 393         {
 394                 restore_flags(flags);
 395                 if(sk && sk->debug) 
 396                         printk("empty\n");
 397                 return(0);
 398         }
 399   
 400         counted = sk->copied_seq+1;     /* Where we are at the moment */
 401         amount = 0;
 402   
 403         /* Do until a push or until we are out of data. */
 404         do 
 405         {
 406                 if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 407                         break;
 408                 sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 409                 if (skb->h.th->syn)
 410                         sum++;
 411                 if (sum > 0) 
 412                 {                                       /* Add it up, move on */
 413                         amount += sum;
 414                         if (skb->h.th->syn) 
 415                                 amount--;
 416                         counted += sum;
 417                 }
 418                 /*
 419                  * Don't count urg data ... but do it in the right place!
 420                  * Consider: "old_data (ptr is here) URG PUSH data"
 421                  * The old code would stop at the first push because
 422                  * it counted the urg (amount==1) and then does amount--
 423                  * *after* the loop.  This means tcp_readable() always
 424                  * returned zero if any URG PUSH was in the queue, even
 425                  * though there was normal data available. If we subtract
 426                  * the urg data right here, we even get it to work for more
 427                  * than one URG PUSH skb without normal data.
 428                  * This means that select() finally works now with urg data
 429                  * in the queue.  Note that rlogin was never affected
 430                  * because it doesn't use select(); it uses two processes
 431                  * and a blocking read().  And the queue scan in tcp_read()
 432                  * was correct.  Mike <pall@rz.uni-karlsruhe.de>
 433                  */
 434                 if (skb->h.th->urg)
 435                         amount--;       /* don't count urg data */
 436                 if (amount && skb->h.th->psh) break;
 437                 skb = skb->next;
 438         }
 439         while(skb != (struct sk_buff *)&sk->receive_queue);
 440 
 441         restore_flags(flags);
 442         if(sk->debug)
 443                 printk("got %lu bytes.\n",amount);
 444         return(amount);
 445 }
 446 
 447 
 448 /*
 449  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 450  *      listening socket has a receive queue of sockets to accept.
 451  */
 452 
 453 static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 454 {
 455         sk->inuse = 1;
 456 
 457         switch(sel_type) 
 458         {
 459                 case SEL_IN:
 460                         select_wait(sk->sleep, wait);
 461                         if (skb_peek(&sk->receive_queue) != NULL) 
 462                         {
 463                                 if ((sk->state == TCP_LISTEN && tcp_find_established(sk)) || tcp_readable(sk)) 
 464                                 {
 465                                         release_sock(sk);
 466                                         return(1);
 467                                 }
 468                         }
 469                         if (sk->err != 0)       /* Receiver error */
 470                         {
 471                                 release_sock(sk);
 472                                 return(1);
 473                         }
 474                         if (sk->shutdown & RCV_SHUTDOWN) 
 475                         {
 476                                 release_sock(sk);
 477                                 return(1);
 478                         } 
 479                         release_sock(sk);
 480                         return(0);
 481                 case SEL_OUT:
 482                         select_wait(sk->sleep, wait);
 483                         if (sk->shutdown & SEND_SHUTDOWN) 
 484                         {
 485                                 /* FIXME: should this return an error? */
 486                                 release_sock(sk);
 487                                 return(0);
 488                         }
 489 
 490                         /*
 491                          * This is now right thanks to a small fix
 492                          * by Matt Dillon.
 493                          */
 494                         
 495                         if (sk->prot->wspace(sk) >= sk->mtu+128+sk->prot->max_header) 
 496                         {
 497                                 release_sock(sk);
 498                                 /* This should cause connect to work ok. */
 499                                 if (sk->state == TCP_SYN_RECV ||
 500                                     sk->state == TCP_SYN_SENT) return(0);
 501                                 return(1);
 502                         }
 503                         release_sock(sk);
 504                         return(0);
 505                 case SEL_EX:
 506                         select_wait(sk->sleep,wait);
 507                         if (sk->err || sk->urg_data) 
 508                         {
 509                                 release_sock(sk);
 510                                 return(1);
 511                         }
 512                         release_sock(sk);
 513                         return(0);
 514         }
 515 
 516         release_sock(sk);
 517         return(0);
 518 }
 519 
 520 
 521 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 522 {
 523         int err;
 524         switch(cmd) 
 525         {
 526 
 527                 case TIOCINQ:
 528 #ifdef FIXME    /* FIXME: */
 529                 case FIONREAD:
 530 #endif
 531                 {
 532                         unsigned long amount;
 533 
 534                         if (sk->state == TCP_LISTEN) 
 535                                 return(-EINVAL);
 536 
 537                         sk->inuse = 1;
 538                         amount = tcp_readable(sk);
 539                         release_sock(sk);
 540                         err=verify_area(VERIFY_WRITE,(void *)arg,
 541                                                    sizeof(unsigned long));
 542                         if(err)
 543                                 return err;
 544                         put_fs_long(amount,(unsigned long *)arg);
 545                         return(0);
 546                 }
 547                 case SIOCATMARK:
 548                 {
 549                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
 550 
 551                         err = verify_area(VERIFY_WRITE,(void *) arg,
 552                                                   sizeof(unsigned long));
 553                         if (err)
 554                                 return err;
 555                         put_fs_long(answ,(int *) arg);
 556                         return(0);
 557                 }
 558                 case TIOCOUTQ:
 559                 {
 560                         unsigned long amount;
 561 
 562                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 563                         amount = sk->prot->wspace(sk);
 564                         err=verify_area(VERIFY_WRITE,(void *)arg,
 565                                                    sizeof(unsigned long));
 566                         if(err)
 567                                 return err;
 568                         put_fs_long(amount,(unsigned long *)arg);
 569                         return(0);
 570                 }
 571                 default:
 572                         return(-EINVAL);
 573         }
 574 }
 575 
 576 
 577 /*
 578  *      This routine computes a TCP checksum. 
 579  */
 580  
 581 unsigned short tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 582           unsigned long saddr, unsigned long daddr)
 583 {     
 584         unsigned long sum;
 585    
 586         if (saddr == 0) saddr = ip_my_addr();
 587 
 588 /*
 589  * stupid, gcc complains when I use just one __asm__ block,
 590  * something about too many reloads, but this is just two
 591  * instructions longer than what I want
 592  */
 593         __asm__("
 594             addl %%ecx, %%ebx
 595             adcl %%edx, %%ebx
 596             adcl $0, %%ebx
 597             "
 598         : "=b"(sum)
 599         : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 600         : "bx", "cx", "dx" );
 601         __asm__("
 602             movl %%ecx, %%edx
 603             cld
 604             cmpl $32, %%ecx
 605             jb 2f
 606             shrl $5, %%ecx
 607             clc
 608 1:          lodsl
 609             adcl %%eax, %%ebx
 610             lodsl
 611             adcl %%eax, %%ebx
 612             lodsl
 613             adcl %%eax, %%ebx
 614             lodsl
 615             adcl %%eax, %%ebx
 616             lodsl
 617             adcl %%eax, %%ebx
 618             lodsl
 619             adcl %%eax, %%ebx
 620             lodsl
 621             adcl %%eax, %%ebx
 622             lodsl
 623             adcl %%eax, %%ebx
 624             loop 1b
 625             adcl $0, %%ebx
 626             movl %%edx, %%ecx
 627 2:          andl $28, %%ecx
 628             je 4f
 629             shrl $2, %%ecx
 630             clc
 631 3:          lodsl
 632             adcl %%eax, %%ebx
 633             loop 3b
 634             adcl $0, %%ebx
 635 4:          movl $0, %%eax
 636             testw $2, %%dx
 637             je 5f
 638             lodsw
 639             addl %%eax, %%ebx
 640             adcl $0, %%ebx
 641             movw $0, %%ax
 642 5:          test $1, %%edx
 643             je 6f
 644             lodsb
 645             addl %%eax, %%ebx
 646             adcl $0, %%ebx
 647 6:          movl %%ebx, %%eax
 648             shrl $16, %%eax
 649             addw %%ax, %%bx
 650             adcw $0, %%bx
 651             "
 652         : "=b"(sum)
 653         : "0"(sum), "c"(len), "S"(th)
 654         : "ax", "bx", "cx", "dx", "si" );
 655 
 656         /* We only want the bottom 16 bits, but we never cleared the top 16. */
 657   
 658         return((~sum) & 0xffff);
 659 }
 660 
 661 
 662 
 663 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 664                 unsigned long daddr, int len, struct sock *sk)
 665 {
 666         th->check = 0;
 667         th->check = tcp_check(th, len, saddr, daddr);
 668         return;
 669 }
 670 
 671 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 672 {
 673         int size;
 674         struct tcphdr * th = skb->h.th;
 675 
 676         /* length of packet (not counting length of pre-tcp headers) */
 677         size = skb->len - ((unsigned char *) th - skb->data);
 678 
 679         /* sanity check it.. */
 680         if (size < sizeof(struct tcphdr) || size > skb->len) 
 681         {
 682                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 683                         skb, skb->data, th, skb->len);
 684                 kfree_skb(skb, FREE_WRITE);
 685                 return;
 686         }
 687 
 688         /* If we have queued a header size packet.. */
 689         if (size == sizeof(struct tcphdr)) 
 690         {
 691                 /* If its got a syn or fin its notionally included in the size..*/
 692                 if(!th->syn && !th->fin) 
 693                 {
 694                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 695                         kfree_skb(skb,FREE_WRITE);
 696                         return;
 697                 }
 698         }
 699 
 700         tcp_statistics.TcpOutSegs++;  
 701 
 702         skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
 703         if (after(skb->h.seq, sk->window_seq) ||
 704             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 705              sk->packets_out >= sk->cong_window) 
 706         {
 707                 /* checksum will be supplied by tcp_write_xmit.  So
 708                  * we shouldn't need to set it at all.  I'm being paranoid */
 709                 th->check = 0;
 710                 if (skb->next != NULL) 
 711                 {
 712                         printk("tcp_send_partial: next != NULL\n");
 713                         skb_unlink(skb);
 714                 }
 715                 skb_queue_tail(&sk->write_queue, skb);
 716                 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 717                     sk->send_head == NULL &&
 718                     sk->ack_backlog == 0)
 719                         reset_timer(sk, TIME_PROBE0, sk->rto);
 720         } 
 721         else 
 722         {
 723                 th->ack_seq = ntohl(sk->acked_seq);
 724                 th->window = ntohs(tcp_select_window(sk));
 725 
 726                 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
 727 
 728                 sk->sent_seq = sk->write_seq;
 729                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 730         }
 731 }
 732 
 733 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 734 {
 735         struct sk_buff * skb;
 736         unsigned long flags;
 737 
 738         save_flags(flags);
 739         cli();
 740         skb = sk->partial;
 741         if (skb) {
 742                 sk->partial = NULL;
 743                 del_timer(&sk->partial_timer);
 744         }
 745         restore_flags(flags);
 746         return skb;
 747 }
 748 
 749 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 750 {
 751         struct sk_buff *skb;
 752 
 753         if (sk == NULL)
 754                 return;
 755         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 756                 tcp_send_skb(sk, skb);
 757 }
 758 
 759 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 760 {
 761         struct sk_buff * tmp;
 762         unsigned long flags;
 763 
 764         save_flags(flags);
 765         cli();
 766         tmp = sk->partial;
 767         if (tmp)
 768                 del_timer(&sk->partial_timer);
 769         sk->partial = skb;
 770         init_timer(&sk->partial_timer);
 771         sk->partial_timer.expires = HZ;
 772         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 773         sk->partial_timer.data = (unsigned long) sk;
 774         add_timer(&sk->partial_timer);
 775         restore_flags(flags);
 776         if (tmp)
 777                 tcp_send_skb(sk, tmp);
 778 }
 779 
 780 
 781 /*
 782  *      This routine sends an ack and also updates the window. 
 783  */
 784  
 785 static void tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 786              struct sock *sk,
 787              struct tcphdr *th, unsigned long daddr)
 788 {
 789         struct sk_buff *buff;
 790         struct tcphdr *t1;
 791         struct device *dev = NULL;
 792         int tmp;
 793 
 794         if(sk->zapped)
 795                 return;         /* We have been reset, we may not send again */
 796         /*
 797          * We need to grab some memory, and put together an ack,
 798          * and then put it into the queue to be sent.
 799          */
 800 
 801         buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 802         if (buff == NULL) 
 803         {
 804                 /* Force it to send an ack. */
 805                 sk->ack_backlog++;
 806                 if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) 
 807                 {
 808                         reset_timer(sk, TIME_WRITE, 10);
 809                 }
 810                 return;
 811         }
 812 
 813         buff->len = sizeof(struct tcphdr);
 814         buff->sk = sk;
 815         buff->localroute = sk->localroute;
 816         t1 =(struct tcphdr *) buff->data;
 817 
 818         /* Put in the IP header and routing stuff. */
 819         tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 820                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 821         if (tmp < 0) 
 822         {
 823                 buff->free=1;
 824                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 825                 return;
 826         }
 827         buff->len += tmp;
 828         t1 =(struct tcphdr *)((char *)t1 +tmp);
 829 
 830         /* FIXME: */
 831         memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 832 
 833         /*
 834          *      Swap the send and the receive. 
 835          */
 836          
 837         t1->dest = th->source;
 838         t1->source = th->dest;
 839         t1->seq = ntohl(sequence);
 840         t1->ack = 1;
 841         sk->window = tcp_select_window(sk);
 842         t1->window = ntohs(sk->window);
 843         t1->res1 = 0;
 844         t1->res2 = 0;
 845         t1->rst = 0;
 846         t1->urg = 0;
 847         t1->syn = 0;
 848         t1->psh = 0;
 849         t1->fin = 0;
 850         if (ack == sk->acked_seq) 
 851         {
 852                 sk->ack_backlog = 0;
 853                 sk->bytes_rcv = 0;
 854                 sk->ack_timed = 0;
 855                 if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
 856                                   && sk->timeout == TIME_WRITE) 
 857                 {
 858                         if(sk->keepopen) {
 859                                 reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 860                         } else {
 861                                 delete_timer(sk);
 862                         }
 863                 }
 864         }
 865         t1->ack_seq = ntohl(ack);
 866         t1->doff = sizeof(*t1)/4;
 867         tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 868         if (sk->debug)
 869                  printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 870         tcp_statistics.TcpOutSegs++;
 871         sk->prot->queue_xmit(sk, dev, buff, 1);
 872 }
 873 
 874 
 875 /* 
 876  *      This routine builds a generic TCP header. 
 877  */
 878  
 879 static int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 880 {
 881 
 882         /* FIXME: want to get rid of this. */
 883         memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 884         th->seq = htonl(sk->write_seq);
 885         th->psh =(push == 0) ? 1 : 0;
 886         th->doff = sizeof(*th)/4;
 887         th->ack = 1;
 888         th->fin = 0;
 889         sk->ack_backlog = 0;
 890         sk->bytes_rcv = 0;
 891         sk->ack_timed = 0;
 892         th->ack_seq = htonl(sk->acked_seq);
 893         sk->window = tcp_select_window(sk);
 894         th->window = htons(sk->window);
 895 
 896         return(sizeof(*th));
 897 }
 898 
 899 /*
 900  *      This routine copies from a user buffer into a socket,
 901  *      and starts the transmit system.
 902  */
 903 
 904 static int tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 905           int len, int nonblock, unsigned flags)
 906 {
 907         int copied = 0;
 908         int copy;
 909         int tmp;
 910         struct sk_buff *skb;
 911         struct sk_buff *send_tmp;
 912         unsigned char *buff;
 913         struct proto *prot;
 914         struct device *dev = NULL;
 915 
 916         sk->inuse=1;
 917         prot = sk->prot;
 918         while(len > 0) 
 919         {
 920                 if (sk->err) 
 921                 {                       /* Stop on an error */
 922                         release_sock(sk);
 923                         if (copied) 
 924                                 return(copied);
 925                         tmp = -sk->err;
 926                         sk->err = 0;
 927                         return(tmp);
 928                 }
 929 
 930         /*
 931          *      First thing we do is make sure that we are established. 
 932          */
 933         
 934                 if (sk->shutdown & SEND_SHUTDOWN) 
 935                 {
 936                         release_sock(sk);
 937                         sk->err = EPIPE;
 938                         if (copied) 
 939                                 return(copied);
 940                         sk->err = 0;
 941                         return(-EPIPE);
 942                 }
 943 
 944 
 945         /* 
 946          *      Wait for a connection to finish.
 947          */
 948         
 949                 while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
 950                 {
 951                         if (sk->err) 
 952                         {
 953                                 release_sock(sk);
 954                                 if (copied) 
 955                                         return(copied);
 956                                 tmp = -sk->err;
 957                                 sk->err = 0;
 958                                 return(tmp);
 959                         }
 960 
 961                         if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
 962                         {
 963                                 release_sock(sk);
 964                                 if (copied) 
 965                                         return(copied);
 966 
 967                                 if (sk->err) 
 968                                 {
 969                                         tmp = -sk->err;
 970                                         sk->err = 0;
 971                                         return(tmp);
 972                                 }
 973 
 974                                 if (sk->keepopen) 
 975                                 {
 976                                         send_sig(SIGPIPE, current, 0);
 977                                 }
 978                                 return(-EPIPE);
 979                         }
 980 
 981                         if (nonblock || copied) 
 982                         {
 983                                 release_sock(sk);
 984                                 if (copied) 
 985                                         return(copied);
 986                                 return(-EAGAIN);
 987                         }
 988 
 989                         release_sock(sk);
 990                         cli();
 991                 
 992                         if (sk->state != TCP_ESTABLISHED &&
 993                                 sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
 994                         {
 995                                 interruptible_sleep_on(sk->sleep);
 996                                 if (current->signal & ~current->blocked) 
 997                                 {
 998                                         sti();
 999                                         if (copied) 
1000                                                 return(copied);
1001                                         return(-ERESTARTSYS);
1002                                 }
1003                         }
1004                         sk->inuse = 1;
1005                         sti();
1006                 }
1007 
1008         /*
1009          * The following code can result in copy <= if sk->mss is ever
1010          * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
1011          * sk->mtu is constant once SYN processing is finished.  I.e. we
1012          * had better not get here until we've seen his SYN and at least one
1013          * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
1014          * But ESTABLISHED should guarantee that.  sk->max_window is by definition
1015          * non-decreasing.  Note that any ioctl to set user_mss must be done
1016          * before the exchange of SYN's.  If the initial ack from the other
1017          * end has a window of 0, max_window and thus mss will both be 0.
1018          */
1019 
1020         /* 
1021          *      Now we need to check if we have a half built packet. 
1022          */
1023 
1024                 if ((skb = tcp_dequeue_partial(sk)) != NULL) 
1025                 {
1026                         int hdrlen;
1027 
1028                          /* IP header + TCP header */
1029                         hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
1030                                  + sizeof(struct tcphdr);
1031         
1032                         /* Add more stuff to the end of skb->len */
1033                         if (!(flags & MSG_OOB)) 
1034                         {
1035                                 copy = min(sk->mss - (skb->len - hdrlen), len);
1036                                 /* FIXME: this is really a bug. */
1037                                 if (copy <= 0) 
1038                                 {
1039                                         printk("TCP: **bug**: \"copy\" <= 0!!\n");
1040                                         copy = 0;
1041                                 }
1042           
1043                                 memcpy_fromfs(skb->data + skb->len, from, copy);
1044                                 skb->len += copy;
1045                                 from += copy;
1046                                 copied += copy;
1047                                 len -= copy;
1048                                 sk->write_seq += copy;
1049                         }
1050                         if ((skb->len - hdrlen) >= sk->mss ||
1051                                 (flags & MSG_OOB) || !sk->packets_out)
1052                                 tcp_send_skb(sk, skb);
1053                         else
1054                                 tcp_enqueue_partial(skb, sk);
1055                         continue;
1056                 }
1057 
1058         /*
1059          * We also need to worry about the window.
1060          * If window < 1/2 the maximum window we've seen from this
1061          *   host, don't use it.  This is sender side
1062          *   silly window prevention, as specified in RFC1122.
1063          *   (Note that this is different than earlier versions of
1064          *   SWS prevention, e.g. RFC813.).  What we actually do is 
1065          *   use the whole MSS.  Since the results in the right
1066          *   edge of the packet being outside the window, it will
1067          *   be queued for later rather than sent.
1068          */
1069 
1070                 copy = sk->window_seq - sk->write_seq;
1071                 if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
1072                         copy = sk->mss;
1073                 if (copy > len)
1074                         copy = len;
1075 
1076         /*
1077          *      We should really check the window here also. 
1078          */
1079          
1080                 send_tmp = NULL;
1081                 if (copy < sk->mss && !(flags & MSG_OOB)) 
1082                 {
1083                         /*
1084                          *      We will release the socket incase we sleep here. 
1085                          */
1086                         release_sock(sk);
1087                         /*
1088                          *      NB: following must be mtu, because mss can be increased.
1089                          *      mss is always <= mtu 
1090                          */
1091                         skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
1092                         sk->inuse = 1;
1093                         send_tmp = skb;
1094                 } 
1095                 else 
1096                 {
1097                         /*
1098                          *      We will release the socket incase we sleep here. 
1099                          */
1100                         release_sock(sk);
1101                         skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
1102                         sk->inuse = 1;
1103                 }
1104 
1105                 /*
1106                  *      If we didn't get any memory, we need to sleep. 
1107                  */
1108 
1109                 if (skb == NULL) 
1110                 {
1111                         if (nonblock) 
1112                         {
1113                                 release_sock(sk);
1114                                 if (copied) 
1115                                         return(copied);
1116                                 return(-EAGAIN);
1117                         }
1118 
1119                         /*
1120                          *      FIXME: here is another race condition. 
1121                          */
1122 
1123                         tmp = sk->wmem_alloc;
1124                         release_sock(sk);
1125                         cli();
1126                         /*
1127                          *      Again we will try to avoid it. 
1128                          */
1129                         if (tmp <= sk->wmem_alloc &&
1130                                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
1131                                 && sk->err == 0) 
1132                         {
1133                                 interruptible_sleep_on(sk->sleep);
1134                                 if (current->signal & ~current->blocked) 
1135                                 {
1136                                         sti();
1137                                         if (copied) 
1138                                                 return(copied);
1139                                         return(-ERESTARTSYS);
1140                                 }
1141                         }
1142                         sk->inuse = 1;
1143                         sti();
1144                         continue;
1145                 }
1146 
1147                 skb->len = 0;
1148                 skb->sk = sk;
1149                 skb->free = 0;
1150                 skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
1151         
1152                 buff = skb->data;
1153         
1154                 /*
1155                  * FIXME: we need to optimize this.
1156                  * Perhaps some hints here would be good.
1157                  */
1158                 
1159                 tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1160                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1161                 if (tmp < 0 ) 
1162                 {
1163                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1164                         release_sock(sk);
1165                         if (copied) 
1166                                 return(copied);
1167                         return(tmp);
1168                 }
1169                 skb->len += tmp;
1170                 skb->dev = dev;
1171                 buff += tmp;
1172                 skb->h.th =(struct tcphdr *) buff;
1173                 tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1174                 if (tmp < 0) 
1175                 {
1176                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1177                         release_sock(sk);
1178                         if (copied) 
1179                                 return(copied);
1180                         return(tmp);
1181                 }
1182 
1183                 if (flags & MSG_OOB) 
1184                 {
1185                         ((struct tcphdr *)buff)->urg = 1;
1186                         ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1187                 }
1188                 skb->len += tmp;
1189                 memcpy_fromfs(buff+tmp, from, copy);
1190 
1191                 from += copy;
1192                 copied += copy;
1193                 len -= copy;
1194                 skb->len += copy;
1195                 skb->free = 0;
1196                 sk->write_seq += copy;
1197         
1198                 if (send_tmp != NULL && sk->packets_out) 
1199                 {
1200                         tcp_enqueue_partial(send_tmp, sk);
1201                         continue;
1202                 }
1203                 tcp_send_skb(sk, skb);
1204         }
1205         sk->err = 0;
1206 
1207 /*
1208  *      Nagle's rule. Turn Nagle off with TCP_NODELAY for highly
1209  *      interactive fast network servers. It's meant to be on and
1210  *      it really improves the throughput though not the echo time
1211  *      on my slow slip link - Alan
1212  */
1213 
1214 /*
1215  *      Avoid possible race on send_tmp - c/o Johannes Stille 
1216  */
1217  
1218         if(sk->partial && ((!sk->packets_out) 
1219      /* If not nagling we can send on the before case too.. */
1220               || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1221         ))
1222                 tcp_send_partial(sk);
1223 
1224         release_sock(sk);
1225         return(copied);
1226 }
1227 
1228 
1229 static int tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1230            int len, int nonblock, unsigned flags,
1231            struct sockaddr_in *addr, int addr_len)
1232 {
1233         if (flags & ~(MSG_OOB|MSG_DONTROUTE))
1234                 return -EINVAL;
1235         if (sk->state == TCP_CLOSE)
1236                 return -ENOTCONN;
1237         if (addr_len < sizeof(*addr))
1238                 return -EINVAL;
1239         if (addr->sin_family && addr->sin_family != AF_INET) 
1240                 return -EINVAL;
1241         if (addr->sin_port != sk->dummy_th.dest) 
1242                 return -EISCONN;
1243         if (addr->sin_addr.s_addr != sk->daddr) 
1244                 return -EISCONN;
1245         return tcp_write(sk, from, len, nonblock, flags);
1246 }
1247 
1248 
1249 static void tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1250 {
1251         int tmp;
1252         struct device *dev = NULL;
1253         struct tcphdr *t1;
1254         struct sk_buff *buff;
1255 
1256         if (!sk->ack_backlog) 
1257                 return;
1258 
1259         /*
1260          * FIXME: we need to put code here to prevent this routine from
1261          * being called.  Being called once in a while is ok, so only check
1262          * if this is the second time in a row.
1263          */
1264 
1265         /*
1266          * We need to grab some memory, and put together an ack,
1267          * and then put it into the queue to be sent.
1268          */
1269 
1270         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1271         if (buff == NULL) 
1272         {
1273                 /* Try again real soon. */
1274                 reset_timer(sk, TIME_WRITE, 10);
1275                 return;
1276         }
1277 
1278         buff->len = sizeof(struct tcphdr);
1279         buff->sk = sk;
1280         buff->localroute = sk->localroute;
1281         
1282         /*
1283          *      Put in the IP header and routing stuff. 
1284          */
1285 
1286         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1287                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1288         if (tmp < 0) 
1289         {
1290                 buff->free=1;
1291                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1292                 return;
1293         }
1294 
1295         buff->len += tmp;
1296         t1 =(struct tcphdr *)(buff->data +tmp);
1297 
1298         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1299         t1->seq = htonl(sk->sent_seq);
1300         t1->ack = 1;
1301         t1->res1 = 0;
1302         t1->res2 = 0;
1303         t1->rst = 0;
1304         t1->urg = 0;
1305         t1->syn = 0;
1306         t1->psh = 0;
1307         sk->ack_backlog = 0;
1308         sk->bytes_rcv = 0;
1309         sk->window = tcp_select_window(sk);
1310         t1->window = ntohs(sk->window);
1311         t1->ack_seq = ntohl(sk->acked_seq);
1312         t1->doff = sizeof(*t1)/4;
1313         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1314         sk->prot->queue_xmit(sk, dev, buff, 1);
1315         tcp_statistics.TcpOutSegs++;
1316 }
1317 
1318 
1319 /*
1320  *      FIXME:
1321  *      This routine frees used buffers.
1322  *      It should consider sending an ACK to let the
1323  *      other end know we now have a bigger window.
1324  */
1325 
1326 static void cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1327 {
1328         unsigned long flags;
1329         unsigned long left;
1330         struct sk_buff *skb;
1331         unsigned long rspace;
1332 
1333         if(sk->debug)
1334                 printk("cleaning rbuf for sk=%p\n", sk);
1335   
1336         save_flags(flags);
1337         cli();
1338   
1339         left = sk->prot->rspace(sk);
1340  
1341         /*
1342          * We have to loop through all the buffer headers,
1343          * and try to free up all the space we can.
1344          */
1345 
1346         while((skb=skb_peek(&sk->receive_queue)) != NULL) 
1347         {
1348                 if (!skb->used) 
1349                         break;
1350                 skb_unlink(skb);
1351                 skb->sk = sk;
1352                 kfree_skb(skb, FREE_READ);
1353         }
1354 
1355         restore_flags(flags);
1356 
1357         /*
1358          * FIXME:
1359          * At this point we should send an ack if the difference
1360          * in the window, and the amount of space is bigger than
1361          * TCP_WINDOW_DIFF.
1362          */
1363 
1364         if(sk->debug)
1365                 printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk),
1366                                             left);
1367         if ((rspace=sk->prot->rspace(sk)) != left) 
1368         {
1369                 /*
1370                  * This area has caused the most trouble.  The current strategy
1371                  * is to simply do nothing if the other end has room to send at
1372                  * least 3 full packets, because the ack from those will auto-
1373                  * matically update the window.  If the other end doesn't think
1374                  * we have much space left, but we have room for at least 1 more
1375                  * complete packet than it thinks we do, we will send an ack
1376                  * immediately.  Otherwise we will wait up to .5 seconds in case
1377                  * the user reads some more.
1378                  */
1379                 sk->ack_backlog++;
1380         /*
1381          * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1382          * if the other end is offering a window smaller than the agreed on MSS
1383          * (called sk->mtu here).  In theory there's no connection between send
1384          * and receive, and so no reason to think that they're going to send
1385          * small packets.  For the moment I'm using the hack of reducing the mss
1386          * only on the send side, so I'm putting mtu here.
1387          */
1388 
1389                 if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) 
1390                 {
1391                         /* Send an ack right now. */
1392                         tcp_read_wakeup(sk);
1393                 } 
1394                 else 
1395                 {
1396                         /* Force it to send an ack soon. */
1397                         int was_active = del_timer(&sk->timer);
1398                         if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
1399                         {
1400                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1401                         } 
1402                         else
1403                                 add_timer(&sk->timer);
1404                 }
1405         }
1406 } 
1407 
1408 
1409 /*
1410  *      Handle reading urgent data. 
1411  */
1412  
1413 static int tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1414              unsigned char *to, int len, unsigned flags)
1415 {
1416 #ifdef NOTDEF
1417         struct wait_queue wait = { current, NULL };
1418 #endif
1419 
1420         while (len > 0) 
1421         {
1422                 if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1423                         return -EINVAL;
1424                 sk->inuse = 1;
1425                 if (sk->urg_data & URG_VALID) 
1426                 {
1427                         char c = sk->urg_data;
1428                         if (!(flags & MSG_PEEK))
1429                                 sk->urg_data = URG_READ;
1430                         put_fs_byte(c, to);
1431                         release_sock(sk);
1432                         return 1;
1433                 }
1434 
1435                 release_sock(sk);
1436                 
1437                 if (sk->err) 
1438                 {
1439                         int tmp = -sk->err;
1440                         sk->err = 0;
1441                         return tmp;
1442                 }
1443 
1444                 if (sk->state == TCP_CLOSE || sk->done) 
1445                 {
1446                         if (!sk->done) {
1447                                 sk->done = 1;
1448                                 return 0;
1449                         }
1450                         return -ENOTCONN;
1451                 }
1452 
1453                 if (sk->shutdown & RCV_SHUTDOWN) 
1454                 {
1455                         sk->done = 1;
1456                         return 0;
1457                 }
1458 
1459                 /*
1460                  * Fixed the recv(..., MSG_OOB) behaviour.  BSD docs and
1461                  * the available implementations agree in this case:
1462                  * this call should never block, independent of the
1463                  * blocking state of the socket.
1464                  * Mike <pall@rz.uni-karlsruhe.de>
1465                  */
1466                 return -EAGAIN;
1467 #ifdef NOTDEF
1468                 /* remove the loop, if this dead code gets removed! */
1469                 if (nonblock)
1470                         return -EAGAIN;
1471 
1472                 if (current->signal & ~current->blocked)
1473                         return -ERESTARTSYS;
1474 
1475                 current->state = TASK_INTERRUPTIBLE;
1476                 add_wait_queue(sk->sleep, &wait);
1477                 if ((sk->urg_data & URG_NOTYET) && sk->err == 0 &&
1478                     !(sk->shutdown & RCV_SHUTDOWN))
1479                         schedule();
1480                 remove_wait_queue(sk->sleep, &wait);
1481                 current->state = TASK_RUNNING;
1482 #endif
1483         }
1484         return 0;
1485 }
1486 
1487 
1488 /*
1489  *      This routine copies from a sock struct into the user buffer. 
1490  */
1491  
1492 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1493         int len, int nonblock, unsigned flags)
1494 {
1495         struct wait_queue wait = { current, NULL };
1496         int copied = 0;
1497         unsigned long peek_seq;
1498         unsigned long *seq;
1499         unsigned long used;
1500 
1501         /* This error should be checked. */
1502         if (sk->state == TCP_LISTEN)
1503                 return -ENOTCONN;
1504 
1505         /* Urgent data needs to be handled specially. */
1506         if (flags & MSG_OOB)
1507                 return tcp_read_urg(sk, nonblock, to, len, flags);
1508 
1509         peek_seq = sk->copied_seq;
1510         seq = &sk->copied_seq;
1511         if (flags & MSG_PEEK)
1512                 seq = &peek_seq;
1513 
1514         add_wait_queue(sk->sleep, &wait);
1515         sk->inuse = 1;
1516         while (len > 0) 
1517         {
1518                 struct sk_buff * skb;
1519                 unsigned long offset;
1520         
1521                 /*
1522                  * are we at urgent data? Stop if we have read anything.
1523                  */
1524                 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1525                         break;
1526 
1527                 current->state = TASK_INTERRUPTIBLE;
1528 
1529                 skb = skb_peek(&sk->receive_queue);
1530                 do 
1531                 {
1532                         if (!skb)
1533                                 break;
1534                         if (before(1+*seq, skb->h.th->seq))
1535                                 break;
1536                         offset = 1 + *seq - skb->h.th->seq;
1537                         if (skb->h.th->syn)
1538                                 offset--;
1539                         if (offset < skb->len)
1540                                 goto found_ok_skb;
1541                         if (!(flags & MSG_PEEK))
1542                                 skb->used = 1;
1543                         skb = skb->next;
1544                 }
1545                 while (skb != (struct sk_buff *)&sk->receive_queue);
1546 
1547                 if (copied)
1548                         break;
1549 
1550                 if (sk->err) 
1551                 {
1552                         copied = -sk->err;
1553                         sk->err = 0;
1554                         break;
1555                 }
1556 
1557                 if (sk->state == TCP_CLOSE) 
1558                 {
1559                         if (!sk->done) 
1560                         {
1561                                 sk->done = 1;
1562                                 break;
1563                         }
1564                         copied = -ENOTCONN;
1565                         break;
1566                 }
1567 
1568                 if (sk->shutdown & RCV_SHUTDOWN) 
1569                 {
1570                         sk->done = 1;
1571                         break;
1572                 }
1573                         
1574                 if (nonblock) 
1575                 {
1576                         copied = -EAGAIN;
1577                         break;
1578                 }
1579 
1580                 cleanup_rbuf(sk);
1581                 release_sock(sk);
1582                 schedule();
1583                 sk->inuse = 1;
1584 
1585                 if (current->signal & ~current->blocked) 
1586                 {
1587                         copied = -ERESTARTSYS;
1588                         break;
1589                 }
1590                 continue;
1591 
1592         found_ok_skb:
1593                 /* Ok so how much can we use ? */
1594                 used = skb->len - offset;
1595                 if (len < used)
1596                         used = len;
1597                 /* do we have urgent data here? */
1598                 if (sk->urg_data) 
1599                 {
1600                         unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1601                         if (urg_offset < used) 
1602                         {
1603                                 if (!urg_offset) 
1604                                 {
1605                                         if (!sk->urginline) 
1606                                         {
1607                                                 ++*seq;
1608                                                 offset++;
1609                                                 used--;
1610                                         }
1611                                 }
1612                                 else
1613                                         used = urg_offset;
1614                         }
1615                 }
1616                 /* Copy it */
1617                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1618                         skb->h.th->doff*4 + offset, used);
1619                 copied += used;
1620                 len -= used;
1621                 to += used;
1622                 *seq += used;
1623                 if (after(sk->copied_seq+1,sk->urg_seq))
1624                         sk->urg_data = 0;
1625                 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1626                         skb->used = 1;
1627         }
1628         remove_wait_queue(sk->sleep, &wait);
1629         current->state = TASK_RUNNING;
1630 
1631         /* Clean up data we have read: This will do ACK frames */
1632         cleanup_rbuf(sk);
1633         release_sock(sk);
1634         return copied;
1635 }
1636 
1637  
1638 /*
1639  *      Shutdown the sending side of a connection.
1640  */
1641 
1642 void tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1643 {
1644         struct sk_buff *buff;
1645         struct tcphdr *t1, *th;
1646         struct proto *prot;
1647         int tmp;
1648         struct device *dev = NULL;
1649 
1650         /*
1651          * We need to grab some memory, and put together a FIN,
1652          * and then put it into the queue to be sent.
1653          * FIXME:
1654          *
1655          *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1656          *      Most of this is guesswork, so maybe it will work...
1657          */
1658 
1659         if (!(how & SEND_SHUTDOWN)) 
1660                 return;
1661          
1662         /*
1663          *      If we've already sent a FIN, return. 
1664          */
1665          
1666         if (sk->state == TCP_FIN_WAIT1 ||
1667             sk->state == TCP_FIN_WAIT2 ||
1668             sk->state == TCP_CLOSING ||
1669             sk->state == TCP_LAST_ACK ||
1670             sk->state == TCP_TIME_WAIT
1671         ) 
1672         {
1673                 return;
1674         }
1675         sk->inuse = 1;
1676 
1677         /*
1678          * flag that the sender has shutdown
1679          */
1680 
1681         sk->shutdown |= SEND_SHUTDOWN;
1682 
1683         /*
1684          *  Clear out any half completed packets. 
1685          */
1686 
1687         if (sk->partial)
1688                 tcp_send_partial(sk);
1689 
1690         prot =(struct proto *)sk->prot;
1691         th =(struct tcphdr *)&sk->dummy_th;
1692         release_sock(sk); /* incase the malloc sleeps. */
1693         buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1694         if (buff == NULL)
1695                 return;
1696         sk->inuse = 1;
1697 
1698         buff->sk = sk;
1699         buff->len = sizeof(*t1);
1700         buff->localroute = sk->localroute;
1701         t1 =(struct tcphdr *) buff->data;
1702 
1703         /*
1704          *      Put in the IP header and routing stuff. 
1705          */
1706 
1707         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1708                            IPPROTO_TCP, sk->opt,
1709                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1710         if (tmp < 0) 
1711         {
1712                 /*
1713                  *      Finish anyway, treat this as a send that got lost. 
1714                  *
1715                  *      Enter FIN_WAIT1 on normal shutdown, which waits for
1716                  *      written data to be completely acknowledged along
1717                  *      with an acknowledge to our FIN.
1718                  *
1719                  *      Enter FIN_WAIT2 on abnormal shutdown -- close before
1720                  *      connection established.
1721                  */
1722                 buff->free=1;
1723                 prot->wfree(sk,buff->mem_addr, buff->mem_len);
1724 
1725                 if (sk->state == TCP_ESTABLISHED)
1726                         tcp_set_state(sk,TCP_FIN_WAIT1);
1727                 else if(sk->state == TCP_CLOSE_WAIT)
1728                         tcp_set_state(sk,TCP_LAST_ACK);
1729                 else
1730                         tcp_set_state(sk,TCP_FIN_WAIT2);
1731 
1732                 release_sock(sk);
1733                 return;
1734         }
1735 
1736         t1 =(struct tcphdr *)((char *)t1 +tmp);
1737         buff->len += tmp;
1738         buff->dev = dev;
1739         memcpy(t1, th, sizeof(*t1));
1740         t1->seq = ntohl(sk->write_seq);
1741         sk->write_seq++;
1742         buff->h.seq = sk->write_seq;
1743         t1->ack = 1;
1744         t1->ack_seq = ntohl(sk->acked_seq);
1745         t1->window = ntohs(sk->window=tcp_select_window(sk));
1746         t1->fin = 1;
1747         t1->rst = 0;
1748         t1->doff = sizeof(*t1)/4;
1749         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1750 
1751         /*
1752          * If there is data in the write queue, the fin must be appended to
1753          * the write queue.
1754          */
1755         
1756         if (skb_peek(&sk->write_queue) != NULL) 
1757         {
1758                 buff->free=0;
1759                 if (buff->next != NULL) 
1760                 {
1761                         printk("tcp_shutdown: next != NULL\n");
1762                         skb_unlink(buff);
1763                 }
1764                 skb_queue_tail(&sk->write_queue, buff);
1765         } 
1766         else 
1767         {
1768                 sk->sent_seq = sk->write_seq;
1769                 sk->prot->queue_xmit(sk, dev, buff, 0);
1770         }
1771 
1772         if (sk->state == TCP_ESTABLISHED) 
1773                 tcp_set_state(sk,TCP_FIN_WAIT1);
1774         else if (sk->state == TCP_CLOSE_WAIT)
1775                 tcp_set_state(sk,TCP_LAST_ACK);
1776         else
1777                 tcp_set_state(sk,TCP_FIN_WAIT2);
1778 
1779         release_sock(sk);
1780 }
1781 
1782 
1783 static int
1784 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1785              int to_len, int nonblock, unsigned flags,
1786              struct sockaddr_in *addr, int *addr_len)
1787 {
1788         int result;
1789   
1790         /* 
1791          *      Have to check these first unlike the old code. If 
1792          *      we check them after we lose data on an error
1793          *      which is wrong 
1794          */
1795 
1796         if(addr_len)
1797                 *addr_len = sizeof(*addr);
1798         result=tcp_read(sk, to, to_len, nonblock, flags);
1799 
1800         if (result < 0) 
1801                 return(result);
1802   
1803         if(addr)
1804         {
1805                 addr->sin_family = AF_INET;
1806                 addr->sin_port = sk->dummy_th.dest;
1807                 addr->sin_addr.s_addr = sk->daddr;
1808         }
1809         return(result);
1810 }
1811 
1812 
1813 /*
1814  *      This routine will send an RST to the other tcp. 
1815  */
1816  
1817 static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1818           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1819 {
1820         struct sk_buff *buff;
1821         struct tcphdr *t1;
1822         int tmp;
1823         struct device *ndev=NULL;
1824   
1825 /*
1826  * We need to grab some memory, and put together an RST,
1827  * and then put it into the queue to be sent.
1828  */
1829 
1830         buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1831         if (buff == NULL) 
1832                 return;
1833 
1834         buff->len = sizeof(*t1);
1835         buff->sk = NULL;
1836         buff->dev = dev;
1837         buff->localroute = 0;
1838 
1839         t1 =(struct tcphdr *) buff->data;
1840 
1841         /*
1842          *      Put in the IP header and routing stuff. 
1843          */
1844 
1845         tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
1846                            sizeof(struct tcphdr),tos,ttl);
1847         if (tmp < 0) 
1848         {
1849                 buff->free = 1;
1850                 prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1851                 return;
1852         }
1853 
1854         t1 =(struct tcphdr *)((char *)t1 +tmp);
1855         buff->len += tmp;
1856         memcpy(t1, th, sizeof(*t1));
1857 
1858         /*
1859          *      Swap the send and the receive. 
1860          */
1861 
1862         t1->dest = th->source;
1863         t1->source = th->dest;
1864         t1->rst = 1;  
1865         t1->window = 0;
1866   
1867         if(th->ack)
1868         {
1869                 t1->ack = 0;
1870                 t1->seq = th->ack_seq;
1871                 t1->ack_seq = 0;
1872         }
1873         else
1874         {
1875                 t1->ack = 1;
1876                 if(!th->syn)
1877                         t1->ack_seq=htonl(th->seq);
1878                 else
1879                         t1->ack_seq=htonl(th->seq+1);
1880                 t1->seq=0;
1881         }
1882 
1883         t1->syn = 0;
1884         t1->urg = 0;
1885         t1->fin = 0;
1886         t1->psh = 0;
1887         t1->doff = sizeof(*t1)/4;
1888         tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1889         prot->queue_xmit(NULL, ndev, buff, 1);
1890         tcp_statistics.TcpOutSegs++;
1891 }
1892 
1893 
1894 /*
1895  *      Look for tcp options. Parses everything but only knows about MSS.
1896  *      This routine is always called with the packet containing the SYN.
1897  *      However it may also be called with the ack to the SYN.  So you
1898  *      can't assume this is always the SYN.  It's always called after
1899  *      we have set up sk->mtu to our own MTU.
1900  */
1901  
1902 static void tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1903 {
1904         unsigned char *ptr;
1905         int length=(th->doff*4)-sizeof(struct tcphdr);
1906         int mss_seen = 0;
1907     
1908         ptr = (unsigned char *)(th + 1);
1909   
1910         while(length>0)
1911         {
1912                 int opcode=*ptr++;
1913                 int opsize=*ptr++;
1914                 switch(opcode)
1915                 {
1916                         case TCPOPT_EOL:
1917                                 return;
1918                         case TCPOPT_NOP:
1919                                 length-=2;
1920                                 continue;
1921                         
1922                         default:
1923                                 if(opsize<=2)   /* Avoid silly options looping forever */
1924                                         return;
1925                                 switch(opcode)
1926                                 {
1927                                         case TCPOPT_MSS:
1928                                                 if(opsize==4 && th->syn)
1929                                                 {
1930                                                         sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1931                                                         mss_seen = 1;
1932                                                 }
1933                                                 break;
1934                                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1935                                 }
1936                                 ptr+=opsize-2;
1937                                 length-=opsize;
1938                 }
1939         }
1940         if (th->syn) 
1941         {
1942                 if (! mss_seen)
1943                       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1944         }
1945 #ifdef CONFIG_INET_PCTCP
1946         sk->mss = min(sk->max_window >> 1, sk->mtu);
1947 #else    
1948         sk->mss = min(sk->max_window, sk->mtu);
1949 #endif  
1950 }
1951 
1952 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1953 {
1954         dst = ntohl(dst);
1955         if (IN_CLASSA(dst))
1956                 return htonl(IN_CLASSA_NET);
1957         if (IN_CLASSB(dst))
1958                 return htonl(IN_CLASSB_NET);
1959         return htonl(IN_CLASSC_NET);
1960 }
1961 
1962 /*
1963  *      This routine handles a connection request.
1964  *      It should make sure we haven't already responded.
1965  *      Because of the way BSD works, we have to send a syn/ack now.
1966  *      This also means it will be harder to close a socket which is
1967  *      listening.
1968  */
1969  
1970 static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1971                  unsigned long daddr, unsigned long saddr,
1972                  struct options *opt, struct device *dev)
1973 {
1974         struct sk_buff *buff;
1975         struct tcphdr *t1;
1976         unsigned char *ptr;
1977         struct sock *newsk;
1978         struct tcphdr *th;
1979         struct device *ndev=NULL;
1980         int tmp;
1981         struct rtable *rt;
1982   
1983         th = skb->h.th;
1984 
1985         /* If the socket is dead, don't accept the connection. */
1986         if (!sk->dead) 
1987         {
1988                 sk->data_ready(sk,0);
1989         }
1990         else 
1991         {
1992                 tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1993                 tcp_statistics.TcpAttemptFails++;
1994                 kfree_skb(skb, FREE_READ);
1995                 return;
1996         }
1997 
1998         /*
1999          * Make sure we can accept more.  This will prevent a
2000          * flurry of syns from eating up all our memory.
2001          */
2002 
2003         if (sk->ack_backlog >= sk->max_ack_backlog) 
2004         {
2005                 tcp_statistics.TcpAttemptFails++;
2006                 kfree_skb(skb, FREE_READ);
2007                 return;
2008         }
2009 
2010         /*
2011          * We need to build a new sock struct.
2012          * It is sort of bad to have a socket without an inode attached
2013          * to it, but the wake_up's will just wake up the listening socket,
2014          * and if the listening socket is destroyed before this is taken
2015          * off of the queue, this will take care of it.
2016          */
2017 
2018         newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
2019         if (newsk == NULL) 
2020         {
2021                 /* just ignore the syn.  It will get retransmitted. */
2022                 tcp_statistics.TcpAttemptFails++;
2023                 kfree_skb(skb, FREE_READ);
2024                 return;
2025         }
2026 
2027         memcpy(newsk, sk, sizeof(*newsk));
2028         skb_queue_head_init(&newsk->write_queue);
2029         skb_queue_head_init(&newsk->receive_queue);
2030         newsk->send_head = NULL;
2031         newsk->send_tail = NULL;
2032         skb_queue_head_init(&newsk->back_log);
2033         newsk->rtt = 0;         /*TCP_CONNECT_TIME<<3*/
2034         newsk->rto = TCP_TIMEOUT_INIT;
2035         newsk->mdev = 0;
2036         newsk->max_window = 0;
2037         newsk->cong_window = 1;
2038         newsk->cong_count = 0;
2039         newsk->ssthresh = 0;
2040         newsk->backoff = 0;
2041         newsk->blog = 0;
2042         newsk->intr = 0;
2043         newsk->proc = 0;
2044         newsk->done = 0;
2045         newsk->partial = NULL;
2046         newsk->pair = NULL;
2047         newsk->wmem_alloc = 0;
2048         newsk->rmem_alloc = 0;
2049         newsk->localroute = sk->localroute;
2050 
2051         newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
2052 
2053         newsk->err = 0;
2054         newsk->shutdown = 0;
2055         newsk->ack_backlog = 0;
2056         newsk->acked_seq = skb->h.th->seq+1;
2057         newsk->fin_seq = skb->h.th->seq;
2058         newsk->copied_seq = skb->h.th->seq;
2059         newsk->state = TCP_SYN_RECV;
2060         newsk->timeout = 0;
2061         newsk->write_seq = jiffies * SEQ_TICK - seq_offset;
2062         newsk->window_seq = newsk->write_seq;
2063         newsk->rcv_ack_seq = newsk->write_seq;
2064         newsk->urg_data = 0;
2065         newsk->retransmits = 0;
2066         newsk->destroy = 0;
2067         init_timer(&newsk->timer);
2068         newsk->timer.data = (unsigned long)newsk;
2069         newsk->timer.function = &net_timer;
2070         newsk->dummy_th.source = skb->h.th->dest;
2071         newsk->dummy_th.dest = skb->h.th->source;
2072         
2073         /*
2074          *      Swap these two, they are from our point of view. 
2075          */
2076          
2077         newsk->daddr = saddr;
2078         newsk->saddr = daddr;
2079 
2080         put_sock(newsk->num,newsk);
2081         newsk->dummy_th.res1 = 0;
2082         newsk->dummy_th.doff = 6;
2083         newsk->dummy_th.fin = 0;
2084         newsk->dummy_th.syn = 0;
2085         newsk->dummy_th.rst = 0;        
2086         newsk->dummy_th.psh = 0;
2087         newsk->dummy_th.ack = 0;
2088         newsk->dummy_th.urg = 0;
2089         newsk->dummy_th.res2 = 0;
2090         newsk->acked_seq = skb->h.th->seq + 1;
2091         newsk->copied_seq = skb->h.th->seq;
2092         newsk->socket = NULL;
2093 
2094         /*
2095          *      Grab the ttl and tos values and use them 
2096          */
2097 
2098         newsk->ip_ttl=sk->ip_ttl;
2099         newsk->ip_tos=skb->ip_hdr->tos;
2100 
2101         /*
2102          *      Use 512 or whatever user asked for 
2103          */
2104 
2105         /*
2106          *      Note use of sk->user_mss, since user has no direct access to newsk 
2107          */
2108 
2109         rt=ip_rt_route(saddr, NULL,NULL);
2110         
2111         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
2112                 newsk->window_clamp = rt->rt_window;
2113         else
2114                 newsk->window_clamp = 0;
2115                 
2116         if (sk->user_mss)
2117                 newsk->mtu = sk->user_mss;
2118         else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
2119                 newsk->mtu = rt->rt_mss - HEADER_SIZE;
2120         else 
2121         {
2122 #ifdef CONFIG_INET_SNARL        /* Sub Nets Are Local */
2123                 if ((saddr ^ daddr) & default_mask(saddr))
2124 #else
2125                 if ((saddr ^ daddr) & dev->pa_mask)
2126 #endif
2127                         newsk->mtu = 576 - HEADER_SIZE;
2128                 else
2129                         newsk->mtu = MAX_WINDOW;
2130         }
2131 
2132         /*
2133          *      But not bigger than device MTU 
2134          */
2135 
2136         newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
2137 
2138         /*
2139          *      This will min with what arrived in the packet 
2140          */
2141 
2142         tcp_options(newsk,skb->h.th);
2143 
2144         buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
2145         if (buff == NULL) 
2146         {
2147                 sk->err = -ENOMEM;
2148                 newsk->dead = 1;
2149                 release_sock(newsk);
2150                 kfree_skb(skb, FREE_READ);
2151                 tcp_statistics.TcpAttemptFails++;
2152                 return;
2153         }
2154   
2155         buff->len = sizeof(struct tcphdr)+4;
2156         buff->sk = newsk;
2157         buff->localroute = newsk->localroute;
2158 
2159         t1 =(struct tcphdr *) buff->data;
2160 
2161         /*
2162          *      Put in the IP header and routing stuff. 
2163          */
2164 
2165         tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
2166                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2167 
2168         /*
2169          *      Something went wrong. 
2170          */
2171 
2172         if (tmp < 0) 
2173         {
2174                 sk->err = tmp;
2175                 buff->free=1;
2176                 kfree_skb(buff,FREE_WRITE);
2177                 newsk->dead = 1;
2178                 release_sock(newsk);
2179                 skb->sk = sk;
2180                 kfree_skb(skb, FREE_READ);
2181                 tcp_statistics.TcpAttemptFails++;
2182                 return;
2183         }
2184 
2185         buff->len += tmp;
2186         t1 =(struct tcphdr *)((char *)t1 +tmp);
2187   
2188         memcpy(t1, skb->h.th, sizeof(*t1));
2189         buff->h.seq = newsk->write_seq;
2190         /*
2191          *      Swap the send and the receive. 
2192          */
2193         t1->dest = skb->h.th->source;
2194         t1->source = newsk->dummy_th.source;
2195         t1->seq = ntohl(newsk->write_seq++);
2196         t1->ack = 1;
2197         newsk->window = tcp_select_window(newsk);
2198         newsk->sent_seq = newsk->write_seq;
2199         t1->window = ntohs(newsk->window);
2200         t1->res1 = 0;
2201         t1->res2 = 0;
2202         t1->rst = 0;
2203         t1->urg = 0;
2204         t1->psh = 0;
2205         t1->syn = 1;
2206         t1->ack_seq = ntohl(skb->h.th->seq+1);
2207         t1->doff = sizeof(*t1)/4+1;
2208         ptr =(unsigned char *)(t1+1);
2209         ptr[0] = 2;
2210         ptr[1] = 4;
2211         ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2212         ptr[3] =(newsk->mtu) & 0xff;
2213 
2214         tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2215         newsk->prot->queue_xmit(newsk, ndev, buff, 0);
2216 
2217         reset_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT);
2218         skb->sk = newsk;
2219 
2220         /*
2221          *      Charge the sock_buff to newsk. 
2222          */
2223          
2224         sk->rmem_alloc -= skb->mem_len;
2225         newsk->rmem_alloc += skb->mem_len;
2226         
2227         skb_queue_tail(&sk->receive_queue,skb);
2228         sk->ack_backlog++;
2229         release_sock(newsk);
2230         tcp_statistics.TcpOutSegs++;
2231 }
2232 
2233 
2234 static void tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2235 {
2236         struct sk_buff *buff;
2237         struct tcphdr *t1, *th;
2238         struct proto *prot;
2239         struct device *dev=NULL;
2240         int tmp;
2241 
2242         /*
2243          * We need to grab some memory, and put together a FIN, 
2244          * and then put it into the queue to be sent.
2245          */
2246         sk->inuse = 1;
2247         sk->keepopen = 1;
2248         sk->shutdown = SHUTDOWN_MASK;
2249 
2250         if (!sk->dead) 
2251                 sk->state_change(sk);
2252 
2253         if (timeout == 0) 
2254         {
2255                 /*
2256                  *  We need to flush the recv. buffs.  We do this only on the
2257                  *  descriptor close, not protocol-sourced closes, because the
2258                  *  reader process may not have drained the data yet!
2259                  */
2260 
2261                 if (skb_peek(&sk->receive_queue) != NULL) 
2262                 {
2263                         struct sk_buff *skb;
2264                         if(sk->debug)
2265                                 printk("Clean rcv queue\n");
2266                         while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
2267                                 kfree_skb(skb, FREE_READ);
2268                         if(sk->debug)
2269                                 printk("Cleaned.\n");
2270                 }
2271         }
2272 
2273         /*
2274          *      Get rid off any half-completed packets. 
2275          */
2276          
2277         if (sk->partial) 
2278         {
2279                 tcp_send_partial(sk);
2280         }
2281 
2282         switch(sk->state) 
2283         {
2284                 case TCP_FIN_WAIT1:
2285                 case TCP_FIN_WAIT2:
2286                 case TCP_CLOSING:
2287                         /*
2288                          * These states occur when we have already closed out
2289                          * our end.  If there is no timeout, we do not do
2290                          * anything.  We may still be in the middle of sending
2291                          * the remainder of our buffer, for example...
2292                          * resetting the timer would be inappropriate.
2293                          *
2294                          * XXX if retransmit count reaches limit, is tcp_close()
2295                          * called with timeout == 1 ? if not, we need to fix that.
2296                          */
2297                         if (!timeout) {
2298                                 int timer_active;
2299 
2300                                 timer_active = del_timer(&sk->timer);
2301                                 if (timer_active)
2302                                         add_timer(&sk->timer);
2303                                 else
2304                                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2305                         }
2306                         if (timeout) 
2307                                 tcp_time_wait(sk);
2308                         release_sock(sk);
2309                         return; /* break causes a double release - messy */
2310                 case TCP_TIME_WAIT:
2311                 case TCP_LAST_ACK:
2312                         /*
2313                          * A timeout from these states terminates the TCB.
2314                          */
2315                         if (timeout) 
2316                         {
2317                                 tcp_set_state(sk,TCP_CLOSE);
2318                         }
2319                         release_sock(sk);
2320                         return;
2321                 case TCP_LISTEN:
2322                         tcp_set_state(sk,TCP_CLOSE);
2323                         release_sock(sk);
2324                         return;
2325                 case TCP_CLOSE:
2326                         release_sock(sk);
2327                         return;
2328                 case TCP_CLOSE_WAIT:
2329                 case TCP_ESTABLISHED:
2330                 case TCP_SYN_SENT:
2331                 case TCP_SYN_RECV:
2332                         prot =(struct proto *)sk->prot;
2333                         th =(struct tcphdr *)&sk->dummy_th;
2334                         buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2335                         if (buff == NULL) 
2336                         {
2337                                 /* This will force it to try again later. */
2338                                 /* Or it would have if someone released the socket
2339                                    first. Anyway it might work now */
2340                                 release_sock(sk);
2341                                 if (sk->state != TCP_CLOSE_WAIT)
2342                                         tcp_set_state(sk,TCP_ESTABLISHED);
2343                                 reset_timer(sk, TIME_CLOSE, 100);
2344                                 return;
2345                         }
2346                         buff->sk = sk;
2347                         buff->free = 1;
2348                         buff->len = sizeof(*t1);
2349                         buff->localroute = sk->localroute;
2350                         t1 =(struct tcphdr *) buff->data;
2351         
2352                         /*
2353                          *      Put in the IP header and routing stuff. 
2354                          */
2355                         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2356                                          IPPROTO_TCP, sk->opt,
2357                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2358                         if (tmp < 0) 
2359                         {
2360                                 sk->write_seq++;        /* Very important 8) */
2361                                 kfree_skb(buff,FREE_WRITE);
2362 
2363                                 /*
2364                                  * Enter FIN_WAIT1 to await completion of
2365                                  * written out data and ACK to our FIN.
2366                                  */
2367 
2368                                 if(sk->state==TCP_ESTABLISHED)
2369                                         tcp_set_state(sk,TCP_FIN_WAIT1);
2370                                 else
2371                                         tcp_set_state(sk,TCP_FIN_WAIT2);
2372                                 reset_timer(sk, TIME_CLOSE,4*sk->rto);
2373                                 if(timeout)
2374                                         tcp_time_wait(sk);
2375 
2376                                 release_sock(sk);
2377                                 return;
2378                         }
2379 
2380                         t1 =(struct tcphdr *)((char *)t1 +tmp);
2381                         buff->len += tmp;
2382                         buff->dev = dev;
2383                         memcpy(t1, th, sizeof(*t1));
2384                         t1->seq = ntohl(sk->write_seq);
2385                         sk->write_seq++;
2386                         buff->h.seq = sk->write_seq;
2387                         t1->ack = 1;
2388         
2389                         /* 
2390                          *      Ack everything immediately from now on. 
2391                          */
2392 
2393                         sk->delay_acks = 0;
2394                         t1->ack_seq = ntohl(sk->acked_seq);
2395                         t1->window = ntohs(sk->window=tcp_select_window(sk));
2396                         t1->fin = 1;
2397                         t1->rst = 0;
2398                         t1->doff = sizeof(*t1)/4;
2399                         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2400 
2401                         tcp_statistics.TcpOutSegs++;
2402         
2403                         if (skb_peek(&sk->write_queue) == NULL) 
2404                         {
2405                                 sk->sent_seq = sk->write_seq;
2406                                 prot->queue_xmit(sk, dev, buff, 0);
2407                         } 
2408                         else 
2409                         {
2410                                 reset_timer(sk, TIME_WRITE, sk->rto);
2411                                 if (buff->next != NULL) 
2412                                 {
2413                                         printk("tcp_close: next != NULL\n");
2414                                         skb_unlink(buff);
2415                                 }
2416                                 skb_queue_tail(&sk->write_queue, buff);
2417                         }
2418 
2419                         /*
2420                          * If established (normal close), enter FIN_WAIT1.
2421                          * If in CLOSE_WAIT, enter LAST_ACK
2422                          * If in CLOSING, remain in CLOSING
2423                          * otherwise enter FIN_WAIT2
2424                          */
2425 
2426                         if (sk->state == TCP_ESTABLISHED)
2427                                 tcp_set_state(sk,TCP_FIN_WAIT1);
2428                         else if (sk->state == TCP_CLOSE_WAIT)
2429                                 tcp_set_state(sk,TCP_LAST_ACK);
2430                         else if (sk->state != TCP_CLOSING)
2431                                 tcp_set_state(sk,TCP_FIN_WAIT2);
2432         }
2433         release_sock(sk);
2434 }
2435 
2436 
2437 /*
2438  * This routine takes stuff off of the write queue,
2439  * and puts it in the xmit queue.
2440  */
2441 static void
2442 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2443 {
2444         struct sk_buff *skb;
2445 
2446         /*
2447          *      The bytes will have to remain here. In time closedown will
2448          *      empty the write queue and all will be happy 
2449          */
2450 
2451         if(sk->zapped)
2452                 return;
2453 
2454         while((skb = skb_peek(&sk->write_queue)) != NULL &&
2455                 before(skb->h.seq, sk->window_seq + 1) &&
2456                 (sk->retransmits == 0 ||
2457                  sk->timeout != TIME_WRITE ||
2458                  before(skb->h.seq, sk->rcv_ack_seq + 1))
2459                 && sk->packets_out < sk->cong_window) 
2460         {
2461                 IS_SKB(skb);
2462                 skb_unlink(skb);
2463                 /* See if we really need to send the packet. */
2464                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) 
2465                 {
2466                         sk->retransmits = 0;
2467                         kfree_skb(skb, FREE_WRITE);
2468                         if (!sk->dead) 
2469                                 sk->write_space(sk);
2470                 } 
2471                 else
2472                 {
2473                         struct tcphdr *th;
2474                         struct iphdr *iph;
2475                         int size;
2476 /*
2477  * put in the ack seq and window at this point rather than earlier,
2478  * in order to keep them monotonic.  We really want to avoid taking
2479  * back window allocations.  That's legal, but RFC1122 says it's frowned on.
2480  * Ack and window will in general have changed since this packet was put
2481  * on the write queue.
2482  */
2483                         iph = (struct iphdr *)(skb->data +
2484                                                skb->dev->hard_header_len);
2485                         th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
2486                         size = skb->len - (((unsigned char *) th) - skb->data);
2487                         
2488                         th->ack_seq = ntohl(sk->acked_seq);
2489                         th->window = ntohs(tcp_select_window(sk));
2490 
2491                         tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
2492 
2493                         sk->sent_seq = skb->h.seq;
2494                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2495                 }
2496         }
2497 }
2498 
2499 
2500 /*
2501  *      This routine deals with incoming acks, but not outgoing ones.
2502  */
2503 
2504 static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2505 {
2506         unsigned long ack;
2507         int flag = 0;
2508 
2509         /* 
2510          * 1 - there was data in packet as well as ack or new data is sent or 
2511          *     in shutdown state
2512          * 2 - data from retransmit queue was acked and removed
2513          * 4 - window shrunk or data from retransmit queue was acked and removed
2514          */
2515 
2516         if(sk->zapped)
2517                 return(1);      /* Dead, cant ack any more so why bother */
2518 
2519         ack = ntohl(th->ack_seq);
2520         if (ntohs(th->window) > sk->max_window) 
2521         {
2522                 sk->max_window = ntohs(th->window);
2523 #ifdef CONFIG_INET_PCTCP
2524                 sk->mss = min(sk->max_window>>1, sk->mtu);
2525 #else
2526                 sk->mss = min(sk->max_window, sk->mtu);
2527 #endif  
2528         }
2529 
2530         if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2531                 sk->retransmits = 0;
2532 
2533         if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
2534         {
2535                 if(sk->debug)
2536                         printk("Ack ignored %lu %lu\n",ack,sk->sent_seq);
2537                         
2538                 /*
2539                  *      Keepalive processing.
2540                  */
2541                  
2542                 if (after(ack, sk->sent_seq) || (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) 
2543                 {
2544                         return(0);
2545                 }
2546                 if (sk->keepopen) 
2547                 {
2548                         if(sk->timeout==TIME_KEEPOPEN)
2549                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2550                 }
2551                 return(1);
2552         }
2553 
2554         if (len != th->doff*4) 
2555                 flag |= 1;
2556 
2557         /* See if our window has been shrunk. */
2558 
2559         if (after(sk->window_seq, ack+ntohs(th->window))) 
2560         {
2561                 /*
2562                  * We may need to move packets from the send queue
2563                  * to the write queue, if the window has been shrunk on us.
2564                  * The RFC says you are not allowed to shrink your window
2565                  * like this, but if the other end does, you must be able
2566                  * to deal with it.
2567                  */
2568                 struct sk_buff *skb;
2569                 struct sk_buff *skb2;
2570                 struct sk_buff *wskb = NULL;
2571         
2572                 skb2 = sk->send_head;
2573                 sk->send_head = NULL;
2574                 sk->send_tail = NULL;
2575         
2576                 flag |= 4;
2577         
2578                 sk->window_seq = ack + ntohs(th->window);
2579                 cli();
2580                 while (skb2 != NULL) 
2581                 {
2582                         skb = skb2;
2583                         skb2 = skb->link3;
2584                         skb->link3 = NULL;
2585                         if (after(skb->h.seq, sk->window_seq)) 
2586                         {
2587                                 if (sk->packets_out > 0) 
2588                                         sk->packets_out--;
2589                                 /* We may need to remove this from the dev send list. */
2590                                 if (skb->next != NULL) 
2591                                 {
2592                                         skb_unlink(skb);                                
2593                                 }
2594                                 /* Now add it to the write_queue. */
2595                                 if (wskb == NULL)
2596                                         skb_queue_head(&sk->write_queue,skb);
2597                                 else
2598                                         skb_append(wskb,skb);
2599                                 wskb = skb;
2600                         } 
2601                         else 
2602                         {
2603                                 if (sk->send_head == NULL) 
2604                                 {
2605                                         sk->send_head = skb;
2606                                         sk->send_tail = skb;
2607                                 }
2608                                 else
2609                                 {
2610                                         sk->send_tail->link3 = skb;
2611                                         sk->send_tail = skb;
2612                                 }
2613                                 skb->link3 = NULL;
2614                         }
2615                 }
2616                 sti();
2617         }
2618 
2619         if (sk->send_tail == NULL || sk->send_head == NULL) 
2620         {
2621                 sk->send_head = NULL;
2622                 sk->send_tail = NULL;
2623                 sk->packets_out= 0;
2624         }
2625 
2626         sk->window_seq = ack + ntohs(th->window);
2627 
2628         /* We don't want too many packets out there. */
2629         if (sk->timeout == TIME_WRITE && 
2630                 sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
2631         {
2632 /* 
2633  * This is Jacobson's slow start and congestion avoidance. 
2634  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2635  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2636  * counter and increment it once every cwnd times.  It's possible
2637  * that this should be done only if sk->retransmits == 0.  I'm
2638  * interpreting "new data is acked" as including data that has
2639  * been retransmitted but is just now being acked.
2640  */
2641                 if (sk->cong_window < sk->ssthresh)  
2642                   /* 
2643                    *    In "safe" area, increase
2644                    */
2645                         sk->cong_window++;
2646                 else 
2647                 {
2648                   /*
2649                    *    In dangerous area, increase slowly.  In theory this is
2650                    *    sk->cong_window += 1 / sk->cong_window
2651                    */
2652                         if (sk->cong_count >= sk->cong_window) 
2653                         {
2654                                 sk->cong_window++;
2655                                 sk->cong_count = 0;
2656                         }
2657                         else 
2658                                 sk->cong_count++;
2659                 }
2660         }
2661 
2662         sk->rcv_ack_seq = ack;
2663 
2664         /*
2665          * if this ack opens up a zero window, clear backoff.  It was
2666          * being used to time the probes, and is probably far higher than
2667          * it needs to be for normal retransmission.
2668          */
2669 
2670         if (sk->timeout == TIME_PROBE0) 
2671         {
2672                 if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
2673                     ! before (sk->window_seq, sk->write_queue.next->h.seq)) 
2674                 {
2675                         sk->retransmits = 0;
2676                         sk->backoff = 0;
2677                   /*
2678                    *    Recompute rto from rtt.  this eliminates any backoff.
2679                    */
2680 
2681                         sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2682                         if (sk->rto > 120*HZ)
2683                                 sk->rto = 120*HZ;
2684                         if (sk->rto < 20)       /* Was 1*HZ, then 1 - turns out we must allow about
2685                                                    .2 of a second because of BSD delayed acks - on a 100Mb/sec link
2686                                                    .2 of a second is going to need huge windows (SIGH) */
2687                                 sk->rto = 20;
2688                 }
2689         }
2690 
2691   /* 
2692    *    See if we can take anything off of the retransmit queue.
2693    */
2694    
2695         while(sk->send_head != NULL) 
2696         {
2697                 /* Check for a bug. */
2698                 if (sk->send_head->link3 &&
2699                     after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) 
2700                         printk("INET: tcp.c: *** bug send_list out of order.\n");
2701                 if (before(sk->send_head->h.seq, ack+1)) 
2702                 {
2703                         struct sk_buff *oskb;   
2704                         if (sk->retransmits) 
2705                         {       
2706                                 /*
2707                                  *      We were retransmitting.  don't count this in RTT est 
2708                                  */
2709                                 flag |= 2;
2710 
2711                                 /*
2712                                  * even though we've gotten an ack, we're still
2713                                  * retransmitting as long as we're sending from
2714                                  * the retransmit queue.  Keeping retransmits non-zero
2715                                  * prevents us from getting new data interspersed with
2716                                  * retransmissions.
2717                                  */
2718 
2719                                 if (sk->send_head->link3)
2720                                         sk->retransmits = 1;
2721                                 else
2722                                         sk->retransmits = 0;
2723                         }
2724                         /*
2725                          * Note that we only reset backoff and rto in the
2726                          * rtt recomputation code.  And that doesn't happen
2727                          * if there were retransmissions in effect.  So the
2728                          * first new packet after the retransmissions is
2729                          * sent with the backoff still in effect.  Not until
2730                          * we get an ack from a non-retransmitted packet do
2731                          * we reset the backoff and rto.  This allows us to deal
2732                          * with a situation where the network delay has increased
2733                          * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2734                          */
2735 
2736                         /*
2737                          *      We have one less packet out there. 
2738                          */
2739                          
2740                         if (sk->packets_out > 0) 
2741                                 sk->packets_out --;
2742                         /* 
2743                          *      Wake up the process, it can probably write more. 
2744                          */
2745                         if (!sk->dead) 
2746                                 sk->write_space(sk);
2747                         oskb = sk->send_head;
2748 
2749                         if (!(flag&2)) 
2750                         {
2751                                 long m;
2752         
2753                                 /*
2754                                  *      The following amusing code comes from Jacobson's
2755                                  *      article in SIGCOMM '88.  Note that rtt and mdev
2756                                  *      are scaled versions of rtt and mean deviation.
2757                                  *      This is designed to be as fast as possible 
2758                                  *      m stands for "measurement".
2759                                  */
2760         
2761                                 m = jiffies - oskb->when;  /* RTT */
2762                                 if(m<=0)
2763                                         m=1;            /* IS THIS RIGHT FOR <0 ??? */
2764                                 m -= (sk->rtt >> 3);    /* m is now error in rtt est */
2765                                 sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
2766                                 if (m < 0)
2767                                         m = -m;         /* m is now abs(error) */
2768                                 m -= (sk->mdev >> 2);   /* similar update on mdev */
2769                                 sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
2770         
2771                                 /*
2772                                  *      Now update timeout.  Note that this removes any backoff.
2773                                  */
2774                          
2775                                 sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2776                                 if (sk->rto > 120*HZ)
2777                                         sk->rto = 120*HZ;
2778                                 if (sk->rto < 20)       /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
2779                                         sk->rto = 20;
2780                                 sk->backoff = 0;
2781                         }
2782                         flag |= (2|4);
2783                         cli();
2784                         oskb = sk->send_head;
2785                         IS_SKB(oskb);
2786                         sk->send_head = oskb->link3;
2787                         if (sk->send_head == NULL) 
2788                         {
2789                                 sk->send_tail = NULL;
2790                         }
2791 
2792                 /*
2793                  *      We may need to remove this from the dev send list. 
2794                  */
2795 
2796                         if (oskb->next)
2797                                 skb_unlink(oskb);
2798                         sti();
2799                         kfree_skb(oskb, FREE_WRITE); /* write. */
2800                         if (!sk->dead) 
2801                                 sk->write_space(sk);
2802                 }
2803                 else
2804                 {
2805                         break;
2806                 }
2807         }
2808 
2809         /*
2810          * XXX someone ought to look at this too.. at the moment, if skb_peek()
2811          * returns non-NULL, we complete ignore the timer stuff in the else
2812          * clause.  We ought to organize the code so that else clause can
2813          * (should) be executed regardless, possibly moving the PROBE timer
2814          * reset over.  The skb_peek() thing should only move stuff to the
2815          * write queue, NOT also manage the timer functions.
2816          */
2817 
2818         /*
2819          * Maybe we can take some stuff off of the write queue,
2820          * and put it onto the xmit queue.
2821          */
2822         if (skb_peek(&sk->write_queue) != NULL) 
2823         {
2824                 if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
2825                         (sk->retransmits == 0 || 
2826                          sk->timeout != TIME_WRITE ||
2827                          before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
2828                         && sk->packets_out < sk->cong_window) 
2829                 {
2830                         flag |= 1;
2831                         tcp_write_xmit(sk);
2832                 }
2833                 else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
2834                         sk->send_head == NULL &&
2835                         sk->ack_backlog == 0 &&
2836                         sk->state != TCP_TIME_WAIT) 
2837                 {
2838                         reset_timer(sk, TIME_PROBE0, sk->rto);
2839                 }               
2840         }
2841         else
2842         {
2843                 /*
2844                  * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
2845                  * from TCP_CLOSE we don't do anything
2846                  *
2847                  * from anything else, if there is write data (or fin) pending,
2848                  * we use a TIME_WRITE timeout, else if keepalive we reset to
2849                  * a KEEPALIVE timeout, else we delete the timer.
2850                  *
2851                  * We do not set flag for nominal write data, otherwise we may
2852                  * force a state where we start to write itsy bitsy tidbits
2853                  * of data.
2854                  */
2855 
2856                 switch(sk->state) {
2857                 case TCP_TIME_WAIT:
2858                         /*
2859                          * keep us in TIME_WAIT until we stop getting packets,
2860                          * reset the timeout.
2861                          */
2862                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2863                         break;
2864                 case TCP_CLOSE:
2865                         /*
2866                          * don't touch the timer.
2867                          */
2868                         break;
2869                 default:
2870                         /*
2871                          * must check send_head, write_queue, and ack_backlog
2872                          * to determine which timeout to use.
2873                          */
2874                         if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) {
2875                                 reset_timer(sk, TIME_WRITE, sk->rto);
2876                         } else if (sk->keepopen) {
2877                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2878                         } else {
2879                                 delete_timer(sk);
2880                         }
2881                         break;
2882                 }
2883 #ifdef NOTDEF
2884                 if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2885                 sk->state != TCP_TIME_WAIT && !sk->keepopen) 
2886                 {
2887                         if (!sk->dead)
2888                                 sk->write_space(sk);
2889                         if (sk->keepopen) {
2890                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2891                         } else {
2892                                 delete_timer(sk);
2893                         }
2894                 }
2895                 else
2896                 {
2897                         if (sk->state != (unsigned char) sk->keepopen) 
2898                         {
2899                                 reset_timer(sk, TIME_WRITE, sk->rto);
2900                         }
2901                         if (sk->state == TCP_TIME_WAIT) 
2902                         {
2903                                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2904                         }       
2905                 }
2906 #endif
2907         }
2908 
2909         if (sk->packets_out == 0 && sk->partial != NULL &&
2910                 skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
2911         {
2912                 flag |= 1;
2913                 tcp_send_partial(sk);
2914         }
2915 
2916         /*
2917          * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
2918          * we are now waiting for an acknowledge to our FIN.  The other end is
2919          * already in TIME_WAIT.
2920          *
2921          * Move to TCP_CLOSE on success.
2922          */
2923 
2924         if (sk->state == TCP_LAST_ACK) 
2925         {
2926                 if (!sk->dead)
2927                         sk->state_change(sk);
2928                 if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) 
2929                 {
2930                         flag |= 1;
2931                         tcp_set_state(sk,TCP_CLOSE);
2932                         sk->shutdown = SHUTDOWN_MASK;
2933                 }
2934         }
2935 
2936         /*
2937          * Incoming ACK to a FIN we sent in the case of our initiating the close.
2938          *
2939          * Move to FIN_WAIT2 to await a FIN from the other end. Set
2940          * SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in.
2941          */
2942 
2943         if (sk->state == TCP_FIN_WAIT1) 
2944         {
2945 
2946                 if (!sk->dead) 
2947                         sk->state_change(sk);
2948                 if (sk->rcv_ack_seq == sk->write_seq) 
2949                 {
2950                         flag |= 1;
2951                         sk->shutdown |= SEND_SHUTDOWN;
2952                         tcp_set_state(sk,TCP_FIN_WAIT2);
2953                 }
2954         }
2955 
2956         /*
2957          *      Incoming ACK to a FIN we sent in the case of a simultaneous close.
2958          *
2959          *      Move to TIME_WAIT
2960          */
2961 
2962         if (sk->state == TCP_CLOSING) 
2963         {
2964 
2965                 if (!sk->dead) 
2966                         sk->state_change(sk);
2967                 if (sk->rcv_ack_seq == sk->write_seq) 
2968                 {
2969                         flag |= 1;
2970                         tcp_time_wait(sk);
2971                 }
2972         }
2973 
2974         /*
2975          * I make no guarantees about the first clause in the following
2976          * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
2977          * what conditions "!flag" would be true.  However I think the rest
2978          * of the conditions would prevent that from causing any
2979          * unnecessary retransmission. 
2980          *   Clearly if the first packet has expired it should be 
2981          * retransmitted.  The other alternative, "flag&2 && retransmits", is
2982          * harder to explain:  You have to look carefully at how and when the
2983          * timer is set and with what timeout.  The most recent transmission always
2984          * sets the timer.  So in general if the most recent thing has timed
2985          * out, everything before it has as well.  So we want to go ahead and
2986          * retransmit some more.  If we didn't explicitly test for this
2987          * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
2988          * would not be true.  If you look at the pattern of timing, you can
2989          * show that rto is increased fast enough that the next packet would
2990          * almost never be retransmitted immediately.  Then you'd end up
2991          * waiting for a timeout to send each packet on the retransmission
2992          * queue.  With my implementation of the Karn sampling algorithm,
2993          * the timeout would double each time.  The net result is that it would
2994          * take a hideous amount of time to recover from a single dropped packet.
2995          * It's possible that there should also be a test for TIME_WRITE, but
2996          * I think as long as "send_head != NULL" and "retransmit" is on, we've
2997          * got to be in real retransmission mode.
2998          *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
2999          * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
3000          * As long as no further losses occur, this seems reasonable.
3001          */
3002         
3003         if (((!flag) || (flag&4)) && sk->send_head != NULL &&
3004                (((flag&2) && sk->retransmits) ||
3005                (sk->send_head->when + sk->rto < jiffies))) 
3006         {
3007                 ip_do_retransmit(sk, 1);
3008                 reset_timer(sk, TIME_WRITE, sk->rto);
3009         }
3010 
3011         return(1);
3012 }
3013 
3014 
3015 /*
3016  *      This routine handles the data.  If there is room in the buffer,
3017  *      it will be have already been moved into it.  If there is no
3018  *      room, then we will just have to discard the packet.
3019  */
3020 
3021 static int tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
3022          unsigned long saddr, unsigned short len)
3023 {
3024         struct sk_buff *skb1, *skb2;
3025         struct tcphdr *th;
3026         int dup_dumped=0;
3027         unsigned long new_seq;
3028 
3029         th = skb->h.th;
3030         skb->len = len -(th->doff*4);
3031 
3032         /* The bytes in the receive read/assembly queue has increased. Needed for the
3033            low memory discard algorithm */
3034            
3035         sk->bytes_rcv += skb->len;
3036         
3037         if (skb->len == 0 && !th->fin && !th->urg && !th->psh) 
3038         {
3039                 /* 
3040                  *      Don't want to keep passing ack's back and forth. 
3041                  *      (someone sent us dataless, boring frame)
3042                  */
3043                 if (!th->ack)
3044                         tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
3045                 kfree_skb(skb, FREE_READ);
3046                 return(0);
3047         }
3048         
3049         /*
3050          *      We no longer have anyone receiving data on this connection.
3051          */
3052 
3053         if(sk->shutdown & RCV_SHUTDOWN)
3054         {
3055                 new_seq= th->seq + skb->len + th->syn;  /* Right edge of _data_ part of frame */
3056                 
3057                 if(after(new_seq,sk->acked_seq+1))      /* If the right edge of this frame is after the last copied byte
3058                                                            then it contains data we will never touch. We send an RST to 
3059                                                            ensure the far end knows it never got to the application */
3060                 {
3061                         sk->acked_seq = new_seq + th->fin;
3062                         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
3063                                 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
3064                         tcp_statistics.TcpEstabResets++;
3065                         tcp_set_state(sk,TCP_CLOSE);
3066                         sk->err = EPIPE;
3067                         sk->shutdown = SHUTDOWN_MASK;
3068                         kfree_skb(skb, FREE_READ);
3069                         if (!sk->dead)
3070                                 sk->state_change(sk);
3071                         return(0);
3072                 }
3073         }
3074         /*
3075          *      Now we have to walk the chain, and figure out where this one
3076          *      goes into it.  This is set up so that the last packet we received
3077          *      will be the first one we look at, that way if everything comes
3078          *      in order, there will be no performance loss, and if they come
3079          *      out of order we will be able to fit things in nicely.
3080          */
3081 
3082         /* 
3083          *      This should start at the last one, and then go around forwards.
3084          */
3085 
3086         if (skb_peek(&sk->receive_queue) == NULL)       /* Empty queue is easy case */
3087         {
3088                 skb_queue_head(&sk->receive_queue,skb);
3089                 skb1= NULL;
3090         } 
3091         else
3092         {
3093                 for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
3094                 {
3095                         if(sk->debug)
3096                         {
3097                                 printk("skb1=%p :", skb1);
3098                                 printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
3099                                 printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
3100                                 printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
3101                                                 sk->acked_seq);
3102                         }
3103                         
3104                         /*
3105                          *      Optimisation: Duplicate frame or extension of previous frame from
3106                          *      same sequence point (lost ack case).
3107                          *      The frame contains duplicate data or replaces a previous frame
3108                          *      discard the previous frame (safe as sk->inuse is set) and put
3109                          *      the new one in its place.
3110                          */
3111                          
3112                         if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
3113                         {
3114                                 skb_append(skb1,skb);
3115                                 skb_unlink(skb1);
3116                                 kfree_skb(skb1,FREE_READ);
3117                                 dup_dumped=1;
3118                                 skb1=NULL;
3119                                 break;
3120                         }
3121                         
3122                         /*
3123                          *      Found where it fits
3124                          */
3125                          
3126                         if (after(th->seq+1, skb1->h.th->seq))
3127                         {
3128                                 skb_append(skb1,skb);
3129                                 break;
3130                         }
3131                         
3132                         /*
3133                          *      See if we've hit the start. If so insert.
3134                          */
3135                         if (skb1 == skb_peek(&sk->receive_queue))
3136                         {
3137                                 skb_queue_head(&sk->receive_queue, skb);
3138                                 break;
3139                         }
3140                 }
3141         }
3142 
3143         /*
3144          *      Figure out what the ack value for this frame is
3145          */
3146          
3147         th->ack_seq = th->seq + skb->len;
3148         if (th->syn) 
3149                 th->ack_seq++;
3150         if (th->fin)
3151                 th->ack_seq++;
3152 
3153         if (before(sk->acked_seq, sk->copied_seq)) 
3154         {
3155                 printk("*** tcp.c:tcp_data bug acked < copied\n");
3156                 sk->acked_seq = sk->copied_seq;
3157         }
3158 
3159         /*
3160          *      Now figure out if we can ack anything.
3161          */
3162 
3163         if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) 
3164         {
3165                 if (before(th->seq, sk->acked_seq+1)) 
3166                 {
3167                         int newwindow;
3168 
3169                         if (after(th->ack_seq, sk->acked_seq)) 
3170                         {
3171                                 newwindow = sk->window-(th->ack_seq - sk->acked_seq);
3172                                 if (newwindow < 0)
3173                                         newwindow = 0;  
3174                                 sk->window = newwindow;
3175                                 sk->acked_seq = th->ack_seq;
3176                         }
3177                         skb->acked = 1;
3178 
3179                         /* 
3180                          *      When we ack the fin, we turn on the RCV_SHUTDOWN flag.
3181                          */
3182 
3183                         if (skb->h.th->fin) 
3184                         {
3185                                 if (!sk->dead) 
3186                                         sk->state_change(sk);
3187                                 sk->shutdown |= RCV_SHUTDOWN;
3188                         }
3189           
3190                         for(skb2 = skb->next;
3191                             skb2 != (struct sk_buff *)&sk->receive_queue;
3192                             skb2 = skb2->next) 
3193                         {
3194                                 if (before(skb2->h.th->seq, sk->acked_seq+1)) 
3195                                 {
3196                                         if (after(skb2->h.th->ack_seq, sk->acked_seq))
3197                                         {
3198                                                 newwindow = sk->window -
3199                                                  (skb2->h.th->ack_seq - sk->acked_seq);
3200                                                 if (newwindow < 0)
3201                                                         newwindow = 0;  
3202                                                 sk->window = newwindow;
3203                                                 sk->acked_seq = skb2->h.th->ack_seq;
3204                                         }
3205                                         skb2->acked = 1;
3206                                         /*
3207                                          *      When we ack the fin, we turn on
3208                                          *      the RCV_SHUTDOWN flag.
3209                                          */
3210                                         if (skb2->h.th->fin) 
3211                                         {
3212                                                 sk->shutdown |= RCV_SHUTDOWN;
3213                                                 if (!sk->dead)
3214                                                         sk->state_change(sk);
3215                                         }
3216 
3217                                         /*
3218                                          *      Force an immediate ack.
3219                                          */
3220                                          
3221                                         sk->ack_backlog = sk->max_ack_backlog;
3222                                 }
3223                                 else
3224                                 {
3225                                         break;
3226                                 }
3227                         }
3228 
3229                         /*
3230                          *      This also takes care of updating the window.
3231                          *      This if statement needs to be simplified.
3232                          */
3233                         if (!sk->delay_acks ||
3234                             sk->ack_backlog >= sk->max_ack_backlog || 
3235                             sk->bytes_rcv > sk->max_unacked || th->fin) {
3236         /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
3237                         }
3238                         else 
3239                         {
3240                                 sk->ack_backlog++;
3241                                 if(sk->debug)
3242                                         printk("Ack queued.\n");
3243                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3244                         }
3245                 }
3246         }
3247 
3248         /*
3249          *      If we've missed a packet, send an ack.
3250          *      Also start a timer to send another.
3251          */
3252          
3253         if (!skb->acked) 
3254         {
3255         
3256         /*
3257          *      This is important.  If we don't have much room left,
3258          *      we need to throw out a few packets so we have a good
3259          *      window.  Note that mtu is used, not mss, because mss is really
3260          *      for the send side.  He could be sending us stuff as large as mtu.
3261          */
3262                  
3263                 while (sk->prot->rspace(sk) < sk->mtu) 
3264                 {
3265                         skb1 = skb_peek(&sk->receive_queue);
3266                         if (skb1 == NULL) 
3267                         {
3268                                 printk("INET: tcp.c:tcp_data memory leak detected.\n");
3269                                 break;
3270                         }
3271 
3272                         /*
3273                          *      Don't throw out something that has been acked. 
3274                          */
3275                  
3276                         if (skb1->acked) 
3277                         {
3278                                 break;
3279                         }
3280                 
3281                         skb_unlink(skb1);
3282                         kfree_skb(skb1, FREE_READ);
3283                 }
3284                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3285                 sk->ack_backlog++;
3286                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3287         }
3288         else
3289         {
3290                 /* We missed a packet.  Send an ack to try to resync things. */
3291                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3292         }
3293 
3294         /*
3295          *      Now tell the user we may have some data. 
3296          */
3297          
3298         if (!sk->dead) 
3299         {
3300                 if(sk->debug)
3301                         printk("Data wakeup.\n");
3302                 sk->data_ready(sk,0);
3303         } 
3304         return(0);
3305 }
3306 
3307 
3308 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
3309 {
3310         unsigned long ptr = ntohs(th->urg_ptr);
3311 
3312         if (ptr)
3313                 ptr--;
3314         ptr += th->seq;
3315 
3316         /* ignore urgent data that we've already seen and read */
3317         if (after(sk->copied_seq+1, ptr))
3318                 return;
3319 
3320         /* do we already have a newer (or duplicate) urgent pointer? */
3321         if (sk->urg_data && !after(ptr, sk->urg_seq))
3322                 return;
3323 
3324         /* tell the world about our new urgent pointer */
3325         if (sk->proc != 0) {
3326                 if (sk->proc > 0) {
3327                         kill_proc(sk->proc, SIGURG, 1);
3328                 } else {
3329                         kill_pg(-sk->proc, SIGURG, 1);
3330                 }
3331         }
3332         sk->urg_data = URG_NOTYET;
3333         sk->urg_seq = ptr;
3334 }
3335 
3336 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
3337         unsigned long saddr, unsigned long len)
3338 {
3339         unsigned long ptr;
3340 
3341         /* check if we get a new urgent pointer */
3342         if (th->urg)
3343                 tcp_check_urg(sk,th);
3344 
3345         /* do we wait for any urgent data? */
3346         if (sk->urg_data != URG_NOTYET)
3347                 return 0;
3348 
3349         /* is the urgent pointer pointing into this packet? */
3350         ptr = sk->urg_seq - th->seq + th->doff*4;
3351         if (ptr >= len)
3352                 return 0;
3353 
3354         /* ok, got the correct packet, update info */
3355         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
3356         if (!sk->dead)
3357                 sk->data_ready(sk,0);
3358         return 0;
3359 }
3360 
3361 
3362 /*
3363  *  This deals with incoming fins. 'Linus at 9 O'clock' 8-) 
3364  *
3365  *  If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
3366  *  (and thence onto LAST-ACK and finally, CLOSE, we never enter
3367  *  TIME-WAIT)
3368  *
3369  *  If we are in FINWAIT-1, a received FIN indicates simultaneous
3370  *  close and we go into CLOSING (and later onto TIME-WAIT)
3371  *
3372  *  If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3373  *
3374  */
3375 static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
3376          unsigned long saddr, struct device *dev)
3377 {
3378         sk->fin_seq = th->seq + skb->len + th->syn + th->fin;
3379 
3380         if (!sk->dead) 
3381         {
3382                 sk->state_change(sk);
3383         }
3384 
3385         switch(sk->state) 
3386         {
3387                 case TCP_SYN_RECV:
3388                 case TCP_SYN_SENT:
3389                 case TCP_ESTABLISHED:
3390                         /*
3391                          * move to CLOSE_WAIT, tcp_data() already handled
3392                          * sending the ack.
3393                          */
3394                         reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
3395                         tcp_set_state(sk,TCP_CLOSE_WAIT);
3396                         if (th->rst)
3397                                 sk->shutdown = SHUTDOWN_MASK;
3398                         break;
3399 
3400                 case TCP_CLOSE_WAIT:
3401                 case TCP_CLOSING:
3402                         /*
3403                          * received a retransmission of the FIN, do
3404                          * nothing.
3405                          */
3406                         break;
3407                 case TCP_TIME_WAIT:
3408                         /*
3409                          * received a retransmission of the FIN,
3410                          * restart the TIME_WAIT timer.
3411                          */
3412                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3413                         return(0);
3414                 case TCP_FIN_WAIT1:
3415                         /*
3416                          * This case occurs when a simultaneous close
3417                          * happens, we must ack the received FIN and
3418                          * enter the CLOSING state.
3419                          *
3420                          * XXX timeout not set properly
3421                          */
3422 
3423                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3424                         tcp_set_state(sk,TCP_CLOSING);
3425                         break;
3426                 case TCP_FIN_WAIT2:
3427                         /*
3428                          * received a FIN -- send ACK and enter TIME_WAIT
3429                          */
3430                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3431                         sk->shutdown|=SHUTDOWN_MASK;
3432                         tcp_set_state(sk,TCP_TIME_WAIT);
3433                         break;
3434                 case TCP_CLOSE:
3435                         /*
3436                          * already in CLOSE
3437                          */
3438                         break;
3439                 default:
3440                         tcp_set_state(sk,TCP_LAST_ACK);
3441         
3442                         /* Start the timers. */
3443                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3444                         return(0);
3445         }
3446         sk->ack_backlog++;
3447 
3448         return(0);
3449 }
3450 
3451 
3452 /* This will accept the next outstanding connection. */
3453 static struct sock *
3454 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
3455 {
3456         struct sock *newsk;
3457         struct sk_buff *skb;
3458   
3459   /*
3460    * We need to make sure that this socket is listening,
3461    * and that it has something pending.
3462    */
3463 
3464         if (sk->state != TCP_LISTEN) 
3465         {
3466                 sk->err = EINVAL;
3467                 return(NULL); 
3468         }
3469 
3470         /* Avoid the race. */
3471         cli();
3472         sk->inuse = 1;
3473 
3474         while((skb = tcp_dequeue_established(sk)) == NULL) 
3475         {
3476                 if (flags & O_NONBLOCK) 
3477                 {
3478                         sti();
3479                         release_sock(sk);
3480                         sk->err = EAGAIN;
3481                         return(NULL);
3482                 }
3483 
3484                 release_sock(sk);
3485                 interruptible_sleep_on(sk->sleep);
3486                 if (current->signal & ~current->blocked) 
3487                 {
3488                         sti();
3489                         sk->err = ERESTARTSYS;
3490                         return(NULL);
3491                 }
3492                 sk->inuse = 1;
3493         }
3494         sti();
3495 
3496         /*
3497          *      Now all we need to do is return skb->sk. 
3498          */
3499 
3500         newsk = skb->sk;
3501 
3502         kfree_skb(skb, FREE_READ);
3503         sk->ack_backlog--;
3504         release_sock(sk);
3505         return(newsk);
3506 }
3507 
3508 
3509 /*
3510  *      This will initiate an outgoing connection. 
3511  */
3512  
3513 static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3514 {
3515         struct sk_buff *buff;
3516         struct device *dev=NULL;
3517         unsigned char *ptr;
3518         int tmp;
3519         struct tcphdr *t1;
3520         struct rtable *rt;
3521 
3522         if (sk->state != TCP_CLOSE) 
3523                 return(-EISCONN);
3524 
3525         if (addr_len < 8) 
3526                 return(-EINVAL);
3527 
3528         if (usin->sin_family && usin->sin_family != AF_INET) 
3529                 return(-EAFNOSUPPORT);
3530 
3531         /*
3532          *      connect() to INADDR_ANY means loopback (BSD'ism).
3533          */
3534         
3535         if(usin->sin_addr.s_addr==INADDR_ANY)
3536                 usin->sin_addr.s_addr=ip_my_addr();
3537                   
3538         /*
3539          *      Don't want a TCP connection going to a broadcast address 
3540          */
3541 
3542         if (ip_chk_addr(usin->sin_addr.s_addr) == IS_BROADCAST) 
3543         { 
3544                 return -ENETUNREACH;
3545         }
3546   
3547         /*
3548          *      Connect back to the same socket: Blows up so disallow it 
3549          */
3550 
3551         if(sk->saddr == usin->sin_addr.s_addr && sk->num==ntohs(usin->sin_port))
3552                 return -EBUSY;
3553 
3554         sk->inuse = 1;
3555         sk->daddr = usin->sin_addr.s_addr;
3556         sk->write_seq = jiffies * SEQ_TICK - seq_offset;
3557         sk->window_seq = sk->write_seq;
3558         sk->rcv_ack_seq = sk->write_seq -1;
3559         sk->err = 0;
3560         sk->dummy_th.dest = usin->sin_port;
3561         release_sock(sk);
3562 
3563         buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3564         if (buff == NULL) 
3565         {
3566                 return(-ENOMEM);
3567         }
3568         sk->inuse = 1;
3569         buff->len = 24;
3570         buff->sk = sk;
3571         buff->free = 1;
3572         buff->localroute = sk->localroute;
3573         
3574         t1 = (struct tcphdr *) buff->data;
3575 
3576         /*
3577          *      Put in the IP header and routing stuff. 
3578          */
3579          
3580         rt=ip_rt_route(sk->daddr, NULL, NULL);
3581         
3582 
3583         /*
3584          *      We need to build the routing stuff from the things saved in skb. 
3585          */
3586 
3587         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3588                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3589         if (tmp < 0) 
3590         {
3591                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3592                 release_sock(sk);
3593                 return(-ENETUNREACH);
3594         }
3595 
3596         buff->len += tmp;
3597         t1 = (struct tcphdr *)((char *)t1 +tmp);
3598 
3599         memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3600         t1->seq = ntohl(sk->write_seq++);
3601         sk->sent_seq = sk->write_seq;
3602         buff->h.seq = sk->write_seq;
3603         t1->ack = 0;
3604         t1->window = 2;
3605         t1->res1=0;
3606         t1->res2=0;
3607         t1->rst = 0;
3608         t1->urg = 0;
3609         t1->psh = 0;
3610         t1->syn = 1;
3611         t1->urg_ptr = 0;
3612         t1->doff = 6;
3613         /* use 512 or whatever user asked for */
3614         
3615         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
3616                 sk->window_clamp=rt->rt_window;
3617         else
3618                 sk->window_clamp=0;
3619 
3620         if (sk->user_mss)
3621                 sk->mtu = sk->user_mss;
3622         else if(rt!=NULL && (rt->rt_flags&RTF_MTU))
3623                 sk->mtu = rt->rt_mss;
3624         else 
3625         {
3626 #ifdef CONFIG_INET_SNARL
3627                 if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3628 #else
3629                 if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3630 #endif
3631                         sk->mtu = 576 - HEADER_SIZE;
3632                 else
3633                         sk->mtu = MAX_WINDOW;
3634         }
3635         /*
3636          *      but not bigger than device MTU 
3637          */
3638 
3639         if(sk->mtu <32)
3640                 sk->mtu = 32;   /* Sanity limit */
3641                 
3642         sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3643         
3644         /*
3645          *      Put in the TCP options to say MTU. 
3646          */
3647 
3648         ptr = (unsigned char *)(t1+1);
3649         ptr[0] = 2;
3650         ptr[1] = 4;
3651         ptr[2] = (sk->mtu) >> 8;
3652         ptr[3] = (sk->mtu) & 0xff;
3653         tcp_send_check(t1, sk->saddr, sk->daddr,
3654                   sizeof(struct tcphdr) + 4, sk);
3655 
3656         /*
3657          *      This must go first otherwise a really quick response will get reset. 
3658          */
3659 
3660         tcp_set_state(sk,TCP_SYN_SENT);
3661         sk->rto = TCP_TIMEOUT_INIT;
3662         reset_timer(sk, TIME_WRITE, sk->rto);   /* Timer for repeating the SYN until an answer */
3663         sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3664 
3665         sk->prot->queue_xmit(sk, dev, buff, 0);  
3666         tcp_statistics.TcpActiveOpens++;
3667         tcp_statistics.TcpOutSegs++;
3668   
3669         release_sock(sk);
3670         return(0);
3671 }
3672 
3673 
3674 /* This functions checks to see if the tcp header is actually acceptable. */
3675 static int
3676 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3677              struct options *opt, unsigned long saddr, struct device *dev)
3678 {
3679         unsigned long next_seq;
3680 
3681         next_seq = len - 4*th->doff;
3682         if (th->fin)
3683                 next_seq++;
3684         /* if we have a zero window, we can't have any data in the packet.. */
3685         if (next_seq && !sk->window)
3686                 goto ignore_it;
3687         next_seq += th->seq;
3688 
3689         /*
3690          * This isn't quite right.  sk->acked_seq could be more recent
3691          * than sk->window.  This is however close enough.  We will accept
3692          * slightly more packets than we should, but it should not cause
3693          * problems unless someone is trying to forge packets.
3694          */
3695 
3696         /* have we already seen all of this packet? */
3697         if (!after(next_seq+1, sk->acked_seq))
3698                 goto ignore_it;
3699         /* or does it start beyond the window? */
3700         if (!before(th->seq, sk->acked_seq + sk->window + 1))
3701                 goto ignore_it;
3702 
3703         /* ok, at least part of this packet would seem interesting.. */
3704         return 1;
3705 
3706 ignore_it:
3707         if (th->rst)
3708                 return 0;
3709 
3710         /*
3711          *      Send a reset if we get something not ours and we are
3712          *      unsynchronized. Note: We don't do anything to our end. We
3713          *      are just killing the bogus remote connection then we will
3714          *      connect again and it will work (with luck).
3715          */
3716          
3717         if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) {
3718                 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3719                 return 1;
3720         }
3721 
3722         /* Try to resync things. */
3723         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3724         return 0;
3725 }
3726 
3727 
3728 #ifdef TCP_FASTPATH
3729 /*
3730  *      Is the end of the queue clear of fragments as yet unmerged into the data stream
3731  *      Yes if
3732  *      a) The queue is empty
3733  *      b) The last frame on the queue has the acked flag set
3734  */
3735 
3736 static inline int tcp_clean_end(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3737 {
3738         struct sk_buff *skb=skb_peek(&sk->receive_queue);
3739         if(skb==NULL || sk->receive_queue.prev->acked)
3740                 return 1;
3741 }
3742 
3743 #endif
3744 
3745 int
3746 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3747         unsigned long daddr, unsigned short len,
3748         unsigned long saddr, int redo, struct inet_protocol * protocol)
3749 {
3750         struct tcphdr *th;
3751         struct sock *sk;
3752 
3753         if (!skb) 
3754         {
3755                 return(0);
3756         }
3757 
3758         if (!dev) 
3759         {
3760                 return(0);
3761         }
3762   
3763         tcp_statistics.TcpInSegs++;
3764   
3765         if(skb->pkt_type!=PACKET_HOST)
3766         {
3767                 kfree_skb(skb,FREE_READ);
3768                 return(0);
3769         }
3770   
3771         th = skb->h.th;
3772 
3773         /*
3774          *      Find the socket.
3775          */
3776 
3777         sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3778 
3779         /*
3780          *      If this socket has got a reset its to all intents and purposes 
3781          *      really dead 
3782          */
3783          
3784         if (sk!=NULL && sk->zapped)
3785                 sk=NULL;
3786 
3787         if (!redo) 
3788         {
3789                 if (tcp_check(th, len, saddr, daddr )) 
3790                 {
3791                         skb->sk = NULL;
3792                         kfree_skb(skb,FREE_READ);
3793                         /*
3794                          * We don't release the socket because it was
3795                          * never marked in use.
3796                          */
3797                         return(0);
3798                 }
3799                 th->seq = ntohl(th->seq);
3800 
3801                 /* See if we know about the socket. */
3802                 if (sk == NULL) 
3803                 {
3804                         if (!th->rst)
3805                                 tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3806                         skb->sk = NULL;
3807                         kfree_skb(skb, FREE_READ);
3808                         return(0);
3809                 }
3810 
3811                 skb->len = len;
3812                 skb->sk = sk;
3813                 skb->acked = 0;
3814                 skb->used = 0;
3815                 skb->free = 0;
3816                 skb->saddr = daddr;
3817                 skb->daddr = saddr;
3818         
3819                 /* We may need to add it to the backlog here. */
3820                 cli();
3821                 if (sk->inuse) 
3822                 {
3823                         skb_queue_head(&sk->back_log, skb);
3824                         sti();
3825                         return(0);
3826                 }
3827                 sk->inuse = 1;
3828                 sti();
3829         }
3830         else
3831         {
3832                 if (!sk) 
3833                 {
3834                         return(0);
3835                 }
3836         }
3837 
3838 
3839         if (!sk->prot) 
3840         {
3841                 return(0);
3842         }
3843 
3844 
3845         /*
3846          *      Charge the memory to the socket. 
3847          */
3848          
3849         if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) 
3850         {
3851                 skb->sk = NULL;
3852                 kfree_skb(skb, FREE_READ);
3853                 release_sock(sk);
3854                 return(0);
3855         }
3856 
3857         sk->rmem_alloc += skb->mem_len;
3858 
3859 #ifdef TCP_FASTPATH
3860 /*
3861  *      Incoming data stream fastpath. 
3862  *
3863  *      We try to optimise two things.
3864  *      1) Spot general data arriving without funny options and skip extra checks and the switch.
3865  *      2) Spot the common case in raw data receive streams of a packet that has no funny options,
3866  *      fits exactly on the end of the current queue and may or may not have the ack bit set.
3867  *
3868  *      Case two especially is done inline in this routine so there are no long jumps causing heavy
3869  *      cache thrashing, no function call overhead (except for the ack sending if needed) and for
3870  *      speed although further optimizing here is possible.
3871  */
3872  
3873         /* I'm trusting gcc to optimise this sensibly... might need judicious application of a software mallet */
3874         if(!(sk->shutdown & RCV_SHUTDOWN) && sk->state==TCP_ESTABLISHED && !th->urg && !th->syn && !th->fin && !th->rst)
3875         {       
3876                 /* Packets in order. Fits window */
3877                 if(th->seq == sk->acked_seq+1 && sk->window && tcp_clean_end(sk))
3878                 {
3879                         /* Ack is harder */
3880                         if(th->ack && !tcp_ack(sk, th, saddr, len))
3881                         {
3882                                 kfree_skb(skb, FREE_READ);
3883                                 release_sock(sk);
3884                                 return 0;
3885                         }
3886                         /*
3887                          *      Set up variables
3888                          */
3889                         skb->len -= (th->doff *4);
3890                         sk->bytes_rcv += skb->len;
3891                         tcp_rx_hit2++;
3892                         if(skb->len)
3893                         {
3894                                 skb_queue_tail(&sk->receive_queue,skb); /* We already know where to put it */
3895                                 if(sk->window >= skb->len)
3896                                         sk->window-=skb->len;                   /* We know its effect on the window */
3897                                 else
3898                                         sk->window=0;
3899                                 sk->acked_seq = th->seq+skb->len;       /* Easy */
3900                                 skb->acked=1;                           /* Guaranteed true */
3901                                 if(!sk->delay_acks || sk->ack_backlog >= sk->max_ack_backlog || 
3902                                         sk->bytes_rcv > sk->max_unacked)
3903                                 {
3904                                         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th , saddr);
3905                                 }
3906                                 else
3907                                 {
3908                                         sk->ack_backlog++;
3909                                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3910                                 }
3911                                 if(!sk->dead)
3912                                         sk->data_ready(sk,0);
3913                                 release_sock(sk);
3914                                 return 0;
3915                         }
3916                 }
3917                 /*
3918                  *      More generic case of arriving data stream in ESTABLISHED
3919                  */
3920                 tcp_rx_hit1++;
3921                 if(!tcp_sequence(sk, th, len, opt, saddr, dev))
3922                 {
3923                         kfree_skb(skb, FREE_READ);
3924                         release_sock(sk);
3925                         return 0;
3926                 }
3927                 if(th->ack && !tcp_ack(sk, th, saddr, len))
3928                 {
3929                         kfree_skb(skb, FREE_READ);
3930                         release_sock(sk);
3931                         return 0;
3932                 }
3933                 if(tcp_data(skb, sk, saddr, len))
3934                         kfree_skb(skb, FREE_READ);
3935                 release_sock(sk);
3936                 return 0;
3937         }
3938         tcp_rx_miss++;
3939 #endif  
3940 
3941         /*
3942          *      Now deal with all cases.
3943          */
3944          
3945         switch(sk->state) 
3946         {
3947         
3948                 /*
3949                  * This should close the system down if it's waiting
3950                  * for an ack that is never going to be sent.
3951                  */
3952                 case TCP_LAST_ACK:
3953                         if (th->rst) 
3954                         {
3955                                 sk->zapped=1;
3956                                 sk->err = ECONNRESET;
3957                                 tcp_set_state(sk,TCP_CLOSE);
3958                                 sk->shutdown = SHUTDOWN_MASK;
3959                                 if (!sk->dead) 
3960                                 {
3961                                         sk->state_change(sk);
3962                                 }
3963                                 kfree_skb(skb, FREE_READ);
3964                                 release_sock(sk);
3965                                 return(0);
3966                         }
3967 
3968                 case TCP_ESTABLISHED:
3969                 case TCP_CLOSE_WAIT:
3970                 case TCP_CLOSING:
3971                 case TCP_FIN_WAIT1:
3972                 case TCP_FIN_WAIT2:
3973                 case TCP_TIME_WAIT:
3974                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
3975                         {
3976                                 kfree_skb(skb, FREE_READ);
3977                                 release_sock(sk);
3978                                 return(0);
3979                         }
3980 
3981                         if (th->rst) 
3982                         {
3983                                 tcp_statistics.TcpEstabResets++;
3984                                 sk->zapped=1;
3985                                 /* This means the thing should really be closed. */
3986                                 sk->err = ECONNRESET;
3987                                 if (sk->state == TCP_CLOSE_WAIT) 
3988                                 {
3989                                         sk->err = EPIPE;
3990                                 }
3991         
3992                                 /*
3993                                  * A reset with a fin just means that
3994                                  * the data was not all read.
3995                                  */
3996                                 tcp_set_state(sk,TCP_CLOSE);
3997                                 sk->shutdown = SHUTDOWN_MASK;
3998                                 if (!sk->dead) 
3999                                 {
4000                                         sk->state_change(sk);
4001                                 }
4002                                 kfree_skb(skb, FREE_READ);
4003                                 release_sock(sk);
4004                                 return(0);
4005                         }
4006                         if (th->syn) 
4007                         {
4008                                 tcp_statistics.TcpEstabResets++;
4009                                 sk->err = ECONNRESET;
4010                                 tcp_set_state(sk,TCP_CLOSE);
4011                                 sk->shutdown = SHUTDOWN_MASK;
4012                                 tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
4013                                 if (!sk->dead) {
4014                                         sk->state_change(sk);
4015                                 }
4016                                 kfree_skb(skb, FREE_READ);
4017                                 release_sock(sk);
4018                                 return(0);
4019                         }
4020         
4021                         if (th->ack && !tcp_ack(sk, th, saddr, len)) {
4022                                 kfree_skb(skb, FREE_READ);
4023                                 release_sock(sk);
4024                                 return(0);
4025                         }
4026         
4027                         if (tcp_urg(sk, th, saddr, len)) {
4028                                 kfree_skb(skb, FREE_READ);
4029                                 release_sock(sk);
4030                                 return(0);
4031                         }
4032 
4033         
4034                         if (tcp_data(skb, sk, saddr, len)) {
4035                                 kfree_skb(skb, FREE_READ);
4036                                 release_sock(sk);
4037                                 return(0);
4038                         }       
4039 
4040                         if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) {
4041                                 kfree_skb(skb, FREE_READ);
4042                                 release_sock(sk);
4043                                 return(0);
4044                         }
4045         
4046                         release_sock(sk);
4047                         return(0);
4048                 
4049                 case TCP_CLOSE:
4050                         if (sk->dead || sk->daddr) {
4051                                 kfree_skb(skb, FREE_READ);
4052                                         release_sock(sk);
4053                                 return(0);
4054                         }
4055         
4056                         if (!th->rst) {
4057                                 if (!th->ack)
4058                                         th->ack_seq = 0;
4059                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4060                         }
4061                         kfree_skb(skb, FREE_READ);
4062                         release_sock(sk);
4063                                 return(0);
4064         
4065                 case TCP_LISTEN:
4066                         if (th->rst) {
4067                                 kfree_skb(skb, FREE_READ);
4068                                 release_sock(sk);
4069                                 return(0);
4070                         }
4071                         if (th->ack) {
4072                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4073                                 kfree_skb(skb, FREE_READ);
4074                                 release_sock(sk);
4075                                 return(0);
4076                         }
4077         
4078                         if (th->syn) 
4079                         {
4080                                 /*
4081                                  * Now we just put the whole thing including
4082                                  * the header and saddr, and protocol pointer
4083                                  * into the buffer.  We can't respond until the
4084                                  * user tells us to accept the connection.
4085                                  */
4086                                 tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
4087                                 release_sock(sk);
4088                                 return(0);
4089                         }
4090 
4091                         kfree_skb(skb, FREE_READ);
4092                         release_sock(sk);
4093                         return(0);
4094 
4095                 case TCP_SYN_RECV:
4096                         if (th->syn) {
4097                                 /* Probably a retransmitted syn */
4098                                 kfree_skb(skb, FREE_READ);
4099                                 release_sock(sk);
4100                                 return(0);
4101                         }
4102         
4103         
4104                 default:
4105                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
4106                         {
4107                                 kfree_skb(skb, FREE_READ);
4108                                 release_sock(sk);
4109                                 return(0);
4110                         }
4111         
4112                 case TCP_SYN_SENT:
4113                         if (th->rst) 
4114                         {
4115                                 tcp_statistics.TcpAttemptFails++;
4116                                 sk->err = ECONNREFUSED;
4117                                 tcp_set_state(sk,TCP_CLOSE);
4118                                 sk->shutdown = SHUTDOWN_MASK;
4119                                 sk->zapped = 1;
4120                                 if (!sk->dead) 
4121                                 {
4122                                         sk->state_change(sk);
4123                                 }
4124                                 kfree_skb(skb, FREE_READ);
4125                                 release_sock(sk);
4126                                 return(0);
4127                         }
4128                         if (!th->ack) 
4129                         {
4130                                 if (th->syn) 
4131                                 {
4132                                         tcp_set_state(sk,TCP_SYN_RECV);
4133                                 }
4134                                 kfree_skb(skb, FREE_READ);
4135                                 release_sock(sk);
4136                                 return(0);
4137                         }
4138         
4139                         switch(sk->state) 
4140                         {
4141                                 case TCP_SYN_SENT:
4142                                         if (!tcp_ack(sk, th, saddr, len)) 
4143                                         {
4144                                                 tcp_statistics.TcpAttemptFails++;
4145                                                 tcp_reset(daddr, saddr, th,
4146                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4147                                                 kfree_skb(skb, FREE_READ);
4148                                                         release_sock(sk);
4149                                                 return(0);
4150                                         }
4151         
4152                                         /*
4153                                          * If the syn bit is also set, switch to
4154                                          * tcp_syn_recv, and then to established.
4155                                          */
4156                                         if (!th->syn) 
4157                                         {
4158                                                 kfree_skb(skb, FREE_READ);
4159                                                 release_sock(sk);
4160                                                 return(0);
4161                                         }
4162         
4163                                         /* Ack the syn and fall through. */
4164                                         sk->acked_seq = th->seq+1;
4165                                         sk->fin_seq = th->seq;
4166                                         tcp_send_ack(sk->sent_seq, th->seq+1,
4167                                                 sk, th, sk->daddr);
4168                 
4169                                 case TCP_SYN_RECV:
4170                                         if (!tcp_ack(sk, th, saddr, len)) 
4171                                         {
4172                                                 tcp_statistics.TcpAttemptFails++;
4173                                                 tcp_reset(daddr, saddr, th,
4174                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
4175                                                 kfree_skb(skb, FREE_READ);
4176                                                 release_sock(sk);
4177                                                 return(0);
4178                                         }
4179         
4180                                         tcp_set_state(sk,TCP_ESTABLISHED);
4181         
4182                                         /*
4183                                          *      Now we need to finish filling out
4184                                          *      some of the tcp header.
4185                                          * 
4186                                          *      We need to check for mtu info. 
4187                                          */
4188                                         tcp_options(sk, th);
4189                                         sk->dummy_th.dest = th->source;
4190                                         sk->copied_seq = sk->acked_seq-1;
4191                                         if (!sk->dead) 
4192                                         {
4193                                                 sk->state_change(sk);
4194                                         }
4195         
4196                                         /*
4197                                          * We've already processed his first
4198                                          * ack.  In just about all cases that
4199                                          * will have set max_window.  This is
4200                                          * to protect us against the possibility
4201                                          * that the initial window he sent was 0.
4202                                          * This must occur after tcp_options, which
4203                                          * sets sk->mtu.
4204                                          */
4205                                         if (sk->max_window == 0) 
4206                                         {
4207                                                 sk->max_window = 32;
4208                                                 sk->mss = min(sk->max_window, sk->mtu);
4209                                         }
4210 
4211                                         /*
4212                                          * Now process the rest like we were
4213                                          * already in the established state.
4214                                          */
4215                                         if (th->urg) 
4216                                         {
4217                                                 if (tcp_urg(sk, th, saddr, len)) 
4218                                                 { 
4219                                                         kfree_skb(skb, FREE_READ);
4220                                                         release_sock(sk);
4221                                                         return(0);
4222                                                 }
4223                                         }
4224                                         if (tcp_data(skb, sk, saddr, len))
4225                                                 kfree_skb(skb, FREE_READ);
4226 
4227                                         if (th->fin)
4228                                                 tcp_fin(skb, sk, th, saddr, dev);
4229                                         release_sock(sk);
4230                                         return(0);
4231                         }
4232         
4233                         if (th->urg) 
4234                         {
4235                                 if (tcp_urg(sk, th, saddr, len)) 
4236                                 {
4237                                         kfree_skb(skb, FREE_READ);
4238                                         release_sock(sk);
4239                                         return(0);
4240                                 }
4241                         }
4242                         if (tcp_data(skb, sk, saddr, len)) 
4243                         {
4244                                 kfree_skb(skb, FREE_READ);
4245                                 release_sock(sk);
4246                                 return(0);
4247                         }
4248         
4249                         if (!th->fin) 
4250                         {
4251                                 release_sock(sk);
4252                                 return(0);
4253                         }
4254                         tcp_fin(skb, sk, th, saddr, dev);
4255                         release_sock(sk);
4256                         return(0);
4257         }
4258 }
4259 
4260 
4261 /*
4262  * This routine sends a packet with an out of date sequence
4263  * number. It assumes the other end will try to ack it.
4264  */
4265 
4266 static void tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4267 {
4268         struct sk_buff *buff;
4269         struct tcphdr *t1;
4270         struct device *dev=NULL;
4271         int tmp;
4272 
4273         if (sk->zapped)
4274                 return; /* After a valid reset we can send no more */
4275 
4276         /*
4277          * Write data can still be transmitted/retransmitted in the
4278          * following states.  If any other state is encountered, return.
4279          */
4280 
4281         if (sk->state != TCP_ESTABLISHED && 
4282             sk->state != TCP_CLOSE_WAIT &&
4283             sk->state != TCP_FIN_WAIT1 && 
4284             sk->state != TCP_LAST_ACK &&
4285             sk->state != TCP_CLOSING
4286         ) {
4287                 return;
4288         }
4289 
4290         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
4291         if (buff == NULL) 
4292                 return;
4293 
4294         buff->len = sizeof(struct tcphdr);
4295         buff->free = 1;
4296         buff->sk = sk;
4297         buff->localroute = sk->localroute;
4298 
4299         t1 = (struct tcphdr *) buff->data;
4300 
4301         /* Put in the IP header and routing stuff. */
4302         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
4303                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
4304         if (tmp < 0) 
4305         {
4306                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
4307                 return;
4308         }
4309 
4310         buff->len += tmp;
4311         t1 = (struct tcphdr *)((char *)t1 +tmp);
4312 
4313         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
4314 
4315         /*
4316          * Use a previous sequence.
4317          * This should cause the other end to send an ack.
4318          */
4319         t1->seq = htonl(sk->sent_seq-1);
4320         t1->ack = 1; 
4321         t1->res1= 0;
4322         t1->res2= 0;
4323         t1->rst = 0;
4324         t1->urg = 0;
4325         t1->psh = 0;
4326         t1->fin = 0;
4327         t1->syn = 0;
4328         t1->ack_seq = ntohl(sk->acked_seq);
4329         t1->window = ntohs(tcp_select_window(sk));
4330         t1->doff = sizeof(*t1)/4;
4331         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
4332 
4333          /*     Send it and free it.
4334           *     This will prevent the timer from automatically being restarted.
4335           */
4336         sk->prot->queue_xmit(sk, dev, buff, 1);
4337         tcp_statistics.TcpOutSegs++;
4338 }
4339 
4340 void
4341 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4342 {
4343         if (sk->zapped)
4344                 return;         /* After a valid reset we can send no more */
4345 
4346         tcp_write_wakeup(sk);
4347 
4348         sk->backoff++;
4349         sk->rto = min(sk->rto << 1, 120*HZ);
4350         reset_timer (sk, TIME_PROBE0, sk->rto);
4351         sk->retransmits++;
4352         sk->prot->retransmits ++;
4353 }
4354 
4355 /*
4356  *      Socket option code for TCP. 
4357  */
4358   
4359 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4360 {
4361         int val,err;
4362 
4363         if(level!=SOL_TCP)
4364                 return ip_setsockopt(sk,level,optname,optval,optlen);
4365 
4366         if (optval == NULL) 
4367                 return(-EINVAL);
4368 
4369         err=verify_area(VERIFY_READ, optval, sizeof(int));
4370         if(err)
4371                 return err;
4372         
4373         val = get_fs_long((unsigned long *)optval);
4374 
4375         switch(optname)
4376         {
4377                 case TCP_MAXSEG:
4378 /*
4379  * values greater than interface MTU won't take effect.  however at
4380  * the point when this call is done we typically don't yet know
4381  * which interface is going to be used
4382  */
4383                         if(val<1||val>MAX_WINDOW)
4384                                 return -EINVAL;
4385                         sk->user_mss=val;
4386                         return 0;
4387                 case TCP_NODELAY:
4388                         sk->nonagle=(val==0)?0:1;
4389                         return 0;
4390                 default:
4391                         return(-ENOPROTOOPT);
4392         }
4393 }
4394 
4395 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4396 {
4397         int val,err;
4398 
4399         if(level!=SOL_TCP)
4400                 return ip_getsockopt(sk,level,optname,optval,optlen);
4401                         
4402         switch(optname)
4403         {
4404                 case TCP_MAXSEG:
4405                         val=sk->user_mss;
4406                         break;
4407                 case TCP_NODELAY:
4408                         val=sk->nonagle;        /* Until Johannes stuff is in */
4409                         break;
4410                 default:
4411                         return(-ENOPROTOOPT);
4412         }
4413         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
4414         if(err)
4415                 return err;
4416         put_fs_long(sizeof(int),(unsigned long *) optlen);
4417 
4418         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
4419         if(err)
4420                 return err;
4421         put_fs_long(val,(unsigned long *)optval);
4422 
4423         return(0);
4424 }       
4425 
4426 
4427 struct proto tcp_prot = {
4428         sock_wmalloc,
4429         sock_rmalloc,
4430         sock_wfree,
4431         sock_rfree,
4432         sock_rspace,
4433         sock_wspace,
4434         tcp_close,
4435         tcp_read,
4436         tcp_write,
4437         tcp_sendto,
4438         tcp_recvfrom,
4439         ip_build_header,
4440         tcp_connect,
4441         tcp_accept,
4442         ip_queue_xmit,
4443         tcp_retransmit,
4444         tcp_write_wakeup,
4445         tcp_read_wakeup,
4446         tcp_rcv,
4447         tcp_select,
4448         tcp_ioctl,
4449         NULL,
4450         tcp_shutdown,
4451         tcp_setsockopt,
4452         tcp_getsockopt,
4453         128,
4454         0,
4455         {NULL,},
4456         "TCP"
4457 };

/* [previous][next][first][last][top][bottom][index][help] */