root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. tcp_set_state
  3. tcp_select_window
  4. tcp_find_established
  5. tcp_dequeue_established
  6. tcp_time_wait
  7. tcp_retransmit
  8. tcp_err
  9. tcp_readable
  10. tcp_select
  11. tcp_ioctl
  12. tcp_check
  13. tcp_send_check
  14. tcp_send_skb
  15. tcp_dequeue_partial
  16. tcp_send_partial
  17. tcp_enqueue_partial
  18. tcp_send_ack
  19. tcp_build_header
  20. tcp_write
  21. tcp_sendto
  22. tcp_read_wakeup
  23. cleanup_rbuf
  24. tcp_read_urg
  25. tcp_read
  26. tcp_shutdown
  27. tcp_recvfrom
  28. tcp_reset
  29. tcp_options
  30. default_mask
  31. tcp_conn_request
  32. tcp_close
  33. tcp_write_xmit
  34. sort_send
  35. tcp_ack
  36. tcp_data
  37. tcp_check_urg
  38. tcp_urg
  39. tcp_fin
  40. tcp_accept
  41. tcp_connect
  42. tcp_sequence
  43. tcp_clean_end
  44. tcp_rcv
  45. tcp_write_wakeup
  46. tcp_send_probe0
  47. tcp_setsockopt
  48. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@no.unit.nvg>
  20  *
  21  * Fixes:       
  22  *              Alan Cox        :       Numerous verify_area() calls
  23  *              Alan Cox        :       Set the ACK bit on a reset
  24  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  25  *                                      and was trying to connect (tcp_err()).
  26  *              Alan Cox        :       All icmp error handling was broken
  27  *                                      pointers passed where wrong and the
  28  *                                      socket was looked up backwards. Nobody
  29  *                                      tested any icmp error code obviously.
  30  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  31  *                                      on errors. select behaves and the icmp error race
  32  *                                      has gone by moving it into sock.c
  33  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  34  *                                      packets for unknown sockets.
  35  *              Alan Cox        :       tcp option processing.
  36  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  37  *              Herp Rosmanith  :       More reset fixes
  38  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  39  *                                      any kind of RST is right out.
  40  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  41  *                                      otherwise odd bits of prattle escape still
  42  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  43  *                                      LAN workplace lockups.
  44  *              Alan Cox        :       Some tidyups using the new skb list facilities
  45  *              Alan Cox        :       sk->keepopen now seems to work
  46  *              Alan Cox        :       Pulls options out correctly on accepts
  47  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  48  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  49  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  50  *              Alan Cox        :       Added some beter commenting, as the tcp is hard to follow
  51  *              Alan Cox        :       Removed incorrect check for 20 * psh
  52  *      Michael O'Reilly        :       ack < copied bug fix.
  53  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  54  *              Alan Cox        :       FIN with no memory -> CRASH
  55  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  56  *              Alan Cox        :       Added TCP options (SOL_TCP)
  57  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  58  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  59  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  60  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  61  *              Alan Cox        :       Put in missing check for SYN bit.
  62  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  63  *                                      window non shrink trick.
  64  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  65  *              Charles Hedrick :       TCP fixes
  66  *              Toomas Tamm     :       TCP window fixes
  67  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  68  *              Charles Hedrick :       Rewrote most of it to actually work
  69  *              Linus           :       Rewrote tcp_read() and URG handling
  70  *                                      completely
  71  *              Gerhard Koerting:       Fixed some missing timer handling
  72  *              Matthew Dillon  :       Reworked TCP machine states as per RFC
  73  *              Gerhard Koerting:       PC/TCP workarounds
  74  *              Adam Caldwell   :       Assorted timer/timing errors
  75  *              Matthew Dillon  :       Fixed another RST bug
  76  *              Alan Cox        :       Move to kernel side addressing changes.
  77  *              Alan Cox        :       Beginning work on TCP fastpathing (not yet usable)
  78  *              Arnt Gulbrandsen:       Turbocharged tcp_check() routine.
  79  *              Alan Cox        :       TCP fast path debugging
  80  *              Alan Cox        :       Window clamping
  81  *              Michael Riepe   :       Bug in tcp_check()
  82  *              Matt Dillon     :       More TCP improvements and RST bug fixes
  83  *              Matt Dillon     :       Yet more small nasties remove from the TCP code
  84  *                                      (Be very nice to this man if tcp finally works 100%) 8)
  85  *              Alan Cox        :       BSD accept semantics. 
  86  *      Peter De Schrijver      :       ENOTCONN check missing in tcp_sendto().
  87  *
  88  *
  89  * To Fix:
  90  *                      Fast path the code. Two things here - fix the window calculation
  91  *              so it doesn't iterate over the queue, also spot packets with no funny
  92  *              options arriving in order and process directly.
  93  *
  94  *              This program is free software; you can redistribute it and/or
  95  *              modify it under the terms of the GNU General Public License
  96  *              as published by the Free Software Foundation; either version
  97  *              2 of the License, or(at your option) any later version.
  98  *
  99  * Description of States:
 100  *
 101  *      TCP_SYN_SENT            sent a connection request, waiting for ack
 102  *
 103  *      TCP_SYN_RECV            received a connection request, sent ack,
 104  *                              waiting for final ack in three-way handshake.
 105  *
 106  *      TCP_ESTABLISHED         connection established
 107  *
 108  *      TCP_FIN_WAIT1           our side has shutdown, waiting to complete
 109  *                              transmission of remaining buffered data
 110  *
 111  *      TCP_FIN_WAIT2           all buffered data sent, waiting for remote
 112  *                              to shutdown
 113  *
 114  *      TCP_CLOSING             both sides have shutdown but we still have
 115  *                              data we have to finish sending
 116  *
 117  *      TCP_TIME_WAIT           timeout to catch resent junk before entering
 118  *                              closed, can only be entered from FIN_WAIT2
 119  *                              or CLOSING.  Required because the other end
 120  *                              may not have gotten our last ACK causing it
 121  *                              to retransmit the data packet (which we ignore)
 122  *
 123  *      TCP_CLOSE_WAIT          remote side has shutdown and is waiting for
 124  *                              us to finish writing our data and to shutdown
 125  *                              (we have to close() to move on to LAST_ACK)
 126  *
 127  *      TCP_LAST_ACK            out side has shutdown after remote has
 128  *                              shutdown.  There may still be data in our
 129  *                              buffer that we have to finish sending
 130  *              
 131  *      TCP_CLOSE               socket is finished
 132  */
 133 #include <linux/types.h>
 134 #include <linux/sched.h>
 135 #include <linux/mm.h>
 136 #include <linux/string.h>
 137 #include <linux/socket.h>
 138 #include <linux/sockios.h>
 139 #include <linux/termios.h>
 140 #include <linux/in.h>
 141 #include <linux/fcntl.h>
 142 #include <linux/inet.h>
 143 #include <linux/netdevice.h>
 144 #include "snmp.h"
 145 #include "ip.h"
 146 #include "protocol.h"
 147 #include "icmp.h"
 148 #include "tcp.h"
 149 #include <linux/skbuff.h>
 150 #include "sock.h"
 151 #include "route.h"
 152 #include <linux/errno.h>
 153 #include <linux/timer.h>
 154 #include <asm/system.h>
 155 #include <asm/segment.h>
 156 #include <linux/mm.h>
 157 
 158 #undef TCP_FASTPATH
 159 
 160 #define SEQ_TICK 3
 161 unsigned long seq_offset;
 162 struct tcp_mib  tcp_statistics;
 163 
 164 #ifdef TCP_FASTPATH
 165 unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0;
 166 #endif
 167 
 168 
 169 static __inline__ int min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 170 {
 171         if (a < b) 
 172                 return(a);
 173         return(b);
 174 }
 175 
 176 #undef STATE_TRACE
 177 
 178 static __inline__ void tcp_set_state(struct sock *sk, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 179 {
 180         if(sk->state==TCP_ESTABLISHED)
 181                 tcp_statistics.TcpCurrEstab--;
 182 #ifdef STATE_TRACE
 183         if(sk->debug)
 184                 printk("TCP sk=%s, State %d -> %d\n",sk, sk->state,state);
 185 #endif  
 186         sk->state=state;
 187         if(state==TCP_ESTABLISHED)
 188                 tcp_statistics.TcpCurrEstab++;
 189 }
 190 
 191 /* This routine picks a TCP windows for a socket based on
 192    the following constraints
 193    
 194    1. The window can never be shrunk once it is offered (RFC 793)
 195    2. We limit memory per socket
 196    
 197    For now we use NET2E3's heuristic of offering half the memory
 198    we have handy. All is not as bad as this seems however because
 199    of two things. Firstly we will bin packets even within the window
 200    in order to get the data we are waiting for into the memory limit.
 201    Secondly we bin common duplicate forms at receive time
 202    
 203    Better heuristics welcome
 204 */
 205    
 206 int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 207 {
 208         int new_window = sk->prot->rspace(sk);
 209         
 210         if(sk->window_clamp)
 211                 new_window=min(sk->window_clamp,new_window);
 212 /*
 213  * two things are going on here.  First, we don't ever offer a
 214  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 215  * receiver side of SWS as specified in RFC1122.
 216  * Second, we always give them at least the window they
 217  * had before, in order to avoid retracting window.  This
 218  * is technically allowed, but RFC1122 advises against it and
 219  * in practice it causes trouble.
 220  */
 221         if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window)
 222                 return(sk->window);
 223         return(new_window);
 224 }
 225 
 226 /*
 227  *      Find someone to 'accept'. Must be called with
 228  *      sk->inuse=1 or cli()
 229  */ 
 230 
 231 static struct sk_buff *tcp_find_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 232 {
 233         struct sk_buff *p=skb_peek(&s->receive_queue);
 234         if(p==NULL)
 235                 return NULL;
 236         do
 237         {
 238                 if(p->sk->state>=TCP_ESTABLISHED)
 239                         return p;
 240                 p=p->next;
 241         }
 242         while(p!=skb_peek(&s->receive_queue));
 243         return NULL;
 244 }
 245 
 246 static struct sk_buff *tcp_dequeue_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 247 {
 248         struct sk_buff *skb;
 249         unsigned long flags;
 250         save_flags(flags);
 251         cli(); 
 252         skb=tcp_find_established(s);
 253         if(skb!=NULL)
 254                 skb_unlink(skb);        /* Take it off the queue */
 255         restore_flags(flags);
 256         return skb;
 257 }
 258 
 259 
 260 /*
 261  *      Enter the time wait state. 
 262  */
 263 
 264 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 265 {
 266         tcp_set_state(sk,TCP_TIME_WAIT);
 267         sk->shutdown = SHUTDOWN_MASK;
 268         if (!sk->dead)
 269                 sk->state_change(sk);
 270         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 271 }
 272 
 273 /*
 274  *      A timer event has trigger a tcp retransmit timeout. The
 275  *      socket xmit queue is ready and set up to send. Because
 276  *      the ack receive code keeps the queue straight we do
 277  *      nothing clever here.
 278  */
 279 
 280 static void tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 281 {
 282         if (all) 
 283         {
 284                 ip_retransmit(sk, all);
 285                 return;
 286         }
 287 
 288         sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 289         /* sk->ssthresh in theory can be zero.  I guess that's OK */
 290         sk->cong_count = 0;
 291 
 292         sk->cong_window = 1;
 293 
 294         /* Do the actual retransmit. */
 295         ip_retransmit(sk, all);
 296 }
 297 
 298 
 299 /*
 300  * This routine is called by the ICMP module when it gets some
 301  * sort of error condition.  If err < 0 then the socket should
 302  * be closed and the error returned to the user.  If err > 0
 303  * it's just the icmp type << 8 | icmp code.  After adjustment
 304  * header points to the first 8 bytes of the tcp header.  We need
 305  * to find the appropriate port.
 306  */
 307 
 308 void tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 309         unsigned long saddr, struct inet_protocol *protocol)
 310 {
 311         struct tcphdr *th;
 312         struct sock *sk;
 313         struct iphdr *iph=(struct iphdr *)header;
 314   
 315         header+=4*iph->ihl;
 316    
 317 
 318         th =(struct tcphdr *)header;
 319         sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr);
 320 
 321         if (sk == NULL) 
 322                 return;
 323   
 324         if(err<0)
 325         {
 326                 sk->err = -err;
 327                 sk->error_report(sk);
 328                 return;
 329         }
 330 
 331         if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) 
 332         {
 333                 /*
 334                  * FIXME:
 335                  * For now we will just trigger a linear backoff.
 336                  * The slow start code should cause a real backoff here.
 337                  */
 338                 if (sk->cong_window > 4)
 339                         sk->cong_window--;
 340                 return;
 341         }
 342 
 343 /*      sk->err = icmp_err_convert[err & 0xff].errno;  -- moved as TCP should hide non fatals internally (and does) */
 344 
 345         /*
 346          * If we've already connected we will keep trying
 347          * until we time out, or the user gives up.
 348          */
 349 
 350         if (icmp_err_convert[err & 0xff].fatal || sk->state == TCP_SYN_SENT) 
 351         {
 352                 if (sk->state == TCP_SYN_SENT) 
 353                 {
 354                         tcp_statistics.TcpAttemptFails++;
 355                         tcp_set_state(sk,TCP_CLOSE);
 356                         sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 357                 }
 358                 sk->err = icmp_err_convert[err & 0xff].errno;           
 359         }
 360         return;
 361 }
 362 
 363 
 364 /*
 365  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 366  *      in the received data queue (ie a frame missing that needs sending to us)
 367  */
 368 
 369 static int tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 370 {
 371         unsigned long counted;
 372         unsigned long amount;
 373         struct sk_buff *skb;
 374         int sum;
 375         unsigned long flags;
 376 
 377         if(sk && sk->debug)
 378                 printk("tcp_readable: %p - ",sk);
 379 
 380         save_flags(flags);
 381         cli();
 382         if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 383         {
 384                 restore_flags(flags);
 385                 if(sk && sk->debug) 
 386                         printk("empty\n");
 387                 return(0);
 388         }
 389   
 390         counted = sk->copied_seq+1;     /* Where we are at the moment */
 391         amount = 0;
 392   
 393         /* Do until a push or until we are out of data. */
 394         do 
 395         {
 396                 if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 397                         break;
 398                 sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 399                 if (skb->h.th->syn)
 400                         sum++;
 401                 if (sum > 0) 
 402                 {                                       /* Add it up, move on */
 403                         amount += sum;
 404                         if (skb->h.th->syn) 
 405                                 amount--;
 406                         counted += sum;
 407                 }
 408                 if (amount && skb->h.th->psh) break;
 409                 skb = skb->next;
 410         }
 411         while(skb != (struct sk_buff *)&sk->receive_queue);
 412 
 413         if (amount && !sk->urginline && sk->urg_data &&
 414             (sk->urg_seq - sk->copied_seq) <= (counted - sk->copied_seq))
 415                 amount--;               /* don't count urg data */
 416         restore_flags(flags);
 417         if(sk->debug)
 418                 printk("got %lu bytes.\n",amount);
 419         return(amount);
 420 }
 421 
 422 
 423 /*
 424  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 425  *      listening socket has a receive queue of sockets to accept.
 426  */
 427 
 428 static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 429 {
 430         sk->inuse = 1;
 431 
 432         switch(sel_type) 
 433         {
 434                 case SEL_IN:
 435                         if(sk->debug)
 436                                 printk("select in");
 437                         select_wait(sk->sleep, wait);
 438                         if(sk->debug)
 439                                 printk("-select out");
 440                         if (skb_peek(&sk->receive_queue) != NULL) 
 441                         {
 442                                 if ((sk->state == TCP_LISTEN && tcp_find_established(sk)) || tcp_readable(sk)) 
 443                                 {
 444                                         release_sock(sk);
 445                                         if(sk->debug)
 446                                                 printk("-select ok data\n");
 447                                         return(1);
 448                                 }
 449                         }
 450                         if (sk->err != 0)       /* Receiver error */
 451                         {
 452                                 release_sock(sk);
 453                                 if(sk->debug)
 454                                         printk("-select ok error");
 455                                 return(1);
 456                         }
 457                         if (sk->shutdown & RCV_SHUTDOWN) 
 458                         {
 459                                 release_sock(sk);
 460                                 if(sk->debug)
 461                                         printk("-select ok down\n");
 462                                 return(1);
 463                         } 
 464                         else 
 465                         {
 466                                 release_sock(sk);
 467                                 if(sk->debug)
 468                                         printk("-select fail\n");
 469                                 return(0);
 470                         }
 471                 case SEL_OUT:
 472                         select_wait(sk->sleep, wait);
 473                         if (sk->shutdown & SEND_SHUTDOWN) 
 474                         {
 475                                 /* FIXME: should this return an error? */
 476                                 release_sock(sk);
 477                                 return(0);
 478                         }
 479 
 480                         /*
 481                          * This is now right thanks to a small fix
 482                          * by Matt Dillon.
 483                          */
 484                         
 485                         if (sk->prot->wspace(sk) >= sk->mtu+128+sk->prot->max_header) 
 486                         {
 487                                 release_sock(sk);
 488                                 /* This should cause connect to work ok. */
 489                                 if (sk->state == TCP_SYN_RECV ||
 490                                     sk->state == TCP_SYN_SENT) return(0);
 491                                 return(1);
 492                         }
 493                         release_sock(sk);
 494                         return(0);
 495                 case SEL_EX:
 496                         select_wait(sk->sleep,wait);
 497                         if (sk->err || sk->urg_data) 
 498                         {
 499                                 release_sock(sk);
 500                                 return(1);
 501                         }
 502                         release_sock(sk);
 503                         return(0);
 504         }
 505 
 506         release_sock(sk);
 507         return(0);
 508 }
 509 
 510 
 511 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 512 {
 513         int err;
 514         switch(cmd) 
 515         {
 516 
 517                 case TIOCINQ:
 518 #ifdef FIXME    /* FIXME: */
 519                 case FIONREAD:
 520 #endif
 521                 {
 522                         unsigned long amount;
 523 
 524                         if (sk->state == TCP_LISTEN) 
 525                                 return(-EINVAL);
 526 
 527                         sk->inuse = 1;
 528                         amount = tcp_readable(sk);
 529                         release_sock(sk);
 530                         err=verify_area(VERIFY_WRITE,(void *)arg,
 531                                                    sizeof(unsigned long));
 532                         if(err)
 533                                 return err;
 534                         put_fs_long(amount,(unsigned long *)arg);
 535                         return(0);
 536                 }
 537                 case SIOCATMARK:
 538                 {
 539                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
 540 
 541                         err = verify_area(VERIFY_WRITE,(void *) arg,
 542                                                   sizeof(unsigned long));
 543                         if (err)
 544                                 return err;
 545                         put_fs_long(answ,(int *) arg);
 546                         return(0);
 547                 }
 548                 case TIOCOUTQ:
 549                 {
 550                         unsigned long amount;
 551 
 552                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 553                         amount = sk->prot->wspace(sk);
 554                         err=verify_area(VERIFY_WRITE,(void *)arg,
 555                                                    sizeof(unsigned long));
 556                         if(err)
 557                                 return err;
 558                         put_fs_long(amount,(unsigned long *)arg);
 559                         return(0);
 560                 }
 561                 default:
 562                         return(-EINVAL);
 563         }
 564 }
 565 
 566 
 567 /*
 568  *      This routine computes a TCP checksum. 
 569  */
 570  
 571 unsigned short tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 572           unsigned long saddr, unsigned long daddr)
 573 {     
 574         unsigned long sum;
 575    
 576         if (saddr == 0) saddr = ip_my_addr();
 577 
 578 /*
 579  * stupid, gcc complains when I use just one __asm__ block,
 580  * something about too many reloads, but this is just two
 581  * instructions longer than what I want
 582  */
 583         __asm__("
 584             addl %%ecx, %%ebx
 585             adcl %%edx, %%ebx
 586             adcl $0, %%ebx
 587             "
 588         : "=b"(sum)
 589         : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 590         : "bx", "cx", "dx" );
 591         __asm__("
 592             movl %%ecx, %%edx
 593             cld
 594             cmpl $32, %%ecx
 595             jb 2f
 596             shrl $5, %%ecx
 597             clc
 598 1:          lodsl
 599             adcl %%eax, %%ebx
 600             lodsl
 601             adcl %%eax, %%ebx
 602             lodsl
 603             adcl %%eax, %%ebx
 604             lodsl
 605             adcl %%eax, %%ebx
 606             lodsl
 607             adcl %%eax, %%ebx
 608             lodsl
 609             adcl %%eax, %%ebx
 610             lodsl
 611             adcl %%eax, %%ebx
 612             lodsl
 613             adcl %%eax, %%ebx
 614             loop 1b
 615             adcl $0, %%ebx
 616             movl %%edx, %%ecx
 617 2:          andl $28, %%ecx
 618             je 4f
 619             shrl $2, %%ecx
 620             clc
 621 3:          lodsl
 622             adcl %%eax, %%ebx
 623             loop 3b
 624             adcl $0, %%ebx
 625 4:          movl $0, %%eax
 626             testw $2, %%dx
 627             je 5f
 628             lodsw
 629             addl %%eax, %%ebx
 630             adcl $0, %%ebx
 631             movw $0, %%ax
 632 5:          test $1, %%edx
 633             je 6f
 634             lodsb
 635             addl %%eax, %%ebx
 636             adcl $0, %%ebx
 637 6:          movl %%ebx, %%eax
 638             shrl $16, %%eax
 639             addw %%ax, %%bx
 640             adcw $0, %%bx
 641             "
 642         : "=b"(sum)
 643         : "0"(sum), "c"(len), "S"(th)
 644         : "ax", "bx", "cx", "dx", "si" );
 645 
 646         /* We only want the bottom 16 bits, but we never cleared the top 16. */
 647   
 648         return((~sum) & 0xffff);
 649 }
 650 
 651 
 652 
 653 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 654                 unsigned long daddr, int len, struct sock *sk)
 655 {
 656         th->check = 0;
 657         th->check = tcp_check(th, len, saddr, daddr);
 658         return;
 659 }
 660 
 661 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 662 {
 663         int size;
 664         struct tcphdr * th = skb->h.th;
 665 
 666         /* length of packet (not counting length of pre-tcp headers) */
 667         size = skb->len - ((unsigned char *) th - skb->data);
 668 
 669         /* sanity check it.. */
 670         if (size < sizeof(struct tcphdr) || size > skb->len) 
 671         {
 672                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 673                         skb, skb->data, th, skb->len);
 674                 kfree_skb(skb, FREE_WRITE);
 675                 return;
 676         }
 677 
 678         /* If we have queued a header size packet.. */
 679         if (size == sizeof(struct tcphdr)) 
 680         {
 681                 /* If its got a syn or fin its notionally included in the size..*/
 682                 if(!th->syn && !th->fin) 
 683                 {
 684                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 685                         kfree_skb(skb,FREE_WRITE);
 686                         return;
 687                 }
 688         }
 689 
 690         tcp_statistics.TcpOutSegs++;  
 691 
 692         skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
 693         if (after(skb->h.seq, sk->window_seq) ||
 694             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 695              sk->packets_out >= sk->cong_window) 
 696         {
 697                 /* checksum will be supplied by tcp_write_xmit.  So
 698                  * we shouldn't need to set it at all.  I'm being paranoid */
 699                 th->check = 0;
 700                 if (skb->next != NULL) 
 701                 {
 702                         printk("tcp_send_partial: next != NULL\n");
 703                         skb_unlink(skb);
 704                 }
 705                 skb_queue_tail(&sk->write_queue, skb);
 706                 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 707                     sk->send_head == NULL &&
 708                     sk->ack_backlog == 0)
 709                         reset_timer(sk, TIME_PROBE0, sk->rto);
 710         } 
 711         else 
 712         {
 713                 th->ack_seq = ntohl(sk->acked_seq);
 714                 th->window = ntohs(tcp_select_window(sk));
 715 
 716                 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
 717 
 718                 sk->sent_seq = sk->write_seq;
 719                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 720         }
 721 }
 722 
 723 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 724 {
 725         struct sk_buff * skb;
 726         unsigned long flags;
 727 
 728         save_flags(flags);
 729         cli();
 730         skb = sk->partial;
 731         if (skb) {
 732                 sk->partial = NULL;
 733                 del_timer(&sk->partial_timer);
 734         }
 735         restore_flags(flags);
 736         return skb;
 737 }
 738 
 739 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 740 {
 741         struct sk_buff *skb;
 742 
 743         if (sk == NULL)
 744                 return;
 745         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 746                 tcp_send_skb(sk, skb);
 747 }
 748 
 749 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 750 {
 751         struct sk_buff * tmp;
 752         unsigned long flags;
 753 
 754         save_flags(flags);
 755         cli();
 756         tmp = sk->partial;
 757         if (tmp)
 758                 del_timer(&sk->partial_timer);
 759         sk->partial = skb;
 760         init_timer(&sk->partial_timer);
 761         sk->partial_timer.expires = HZ;
 762         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 763         sk->partial_timer.data = (unsigned long) sk;
 764         add_timer(&sk->partial_timer);
 765         restore_flags(flags);
 766         if (tmp)
 767                 tcp_send_skb(sk, tmp);
 768 }
 769 
 770 
 771 /*
 772  *      This routine sends an ack and also updates the window. 
 773  */
 774  
 775 static void tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 776              struct sock *sk,
 777              struct tcphdr *th, unsigned long daddr)
 778 {
 779         struct sk_buff *buff;
 780         struct tcphdr *t1;
 781         struct device *dev = NULL;
 782         int tmp;
 783 
 784         if(sk->zapped)
 785                 return;         /* We have been reset, we may not send again */
 786         /*
 787          * We need to grab some memory, and put together an ack,
 788          * and then put it into the queue to be sent.
 789          */
 790 
 791         buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 792         if (buff == NULL) 
 793         {
 794                 /* Force it to send an ack. */
 795                 sk->ack_backlog++;
 796                 if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) 
 797                 {
 798                         reset_timer(sk, TIME_WRITE, 10);
 799                 }
 800                 return;
 801         }
 802 
 803         buff->len = sizeof(struct tcphdr);
 804         buff->sk = sk;
 805         buff->localroute = sk->localroute;
 806         t1 =(struct tcphdr *) buff->data;
 807 
 808         /* Put in the IP header and routing stuff. */
 809         tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 810                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 811         if (tmp < 0) 
 812         {
 813                 buff->free=1;
 814                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 815                 return;
 816         }
 817         buff->len += tmp;
 818         t1 =(struct tcphdr *)((char *)t1 +tmp);
 819 
 820         /* FIXME: */
 821         memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 822 
 823         /*
 824          *      Swap the send and the receive. 
 825          */
 826          
 827         t1->dest = th->source;
 828         t1->source = th->dest;
 829         t1->seq = ntohl(sequence);
 830         t1->ack = 1;
 831         sk->window = tcp_select_window(sk);
 832         t1->window = ntohs(sk->window);
 833         t1->res1 = 0;
 834         t1->res2 = 0;
 835         t1->rst = 0;
 836         t1->urg = 0;
 837         t1->syn = 0;
 838         t1->psh = 0;
 839         t1->fin = 0;
 840         if (ack == sk->acked_seq) 
 841         {
 842                 sk->ack_backlog = 0;
 843                 sk->bytes_rcv = 0;
 844                 sk->ack_timed = 0;
 845                 if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
 846                                   && sk->timeout == TIME_WRITE) 
 847                 {
 848                         if(sk->keepopen) {
 849                                 reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 850                         } else {
 851                                 delete_timer(sk);
 852                         }
 853                 }
 854         }
 855         t1->ack_seq = ntohl(ack);
 856         t1->doff = sizeof(*t1)/4;
 857         tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 858         if (sk->debug)
 859                  printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 860         tcp_statistics.TcpOutSegs++;
 861         sk->prot->queue_xmit(sk, dev, buff, 1);
 862 }
 863 
 864 
 865 /* 
 866  *      This routine builds a generic TCP header. 
 867  */
 868  
 869 static int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 870 {
 871 
 872         /* FIXME: want to get rid of this. */
 873         memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 874         th->seq = htonl(sk->write_seq);
 875         th->psh =(push == 0) ? 1 : 0;
 876         th->doff = sizeof(*th)/4;
 877         th->ack = 1;
 878         th->fin = 0;
 879         sk->ack_backlog = 0;
 880         sk->bytes_rcv = 0;
 881         sk->ack_timed = 0;
 882         th->ack_seq = htonl(sk->acked_seq);
 883         sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/;
 884         th->window = htons(sk->window);
 885 
 886         return(sizeof(*th));
 887 }
 888 
 889 /*
 890  *      This routine copies from a user buffer into a socket,
 891  *      and starts the transmit system.
 892  */
 893 
 894 static int tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 895           int len, int nonblock, unsigned flags)
 896 {
 897         int copied = 0;
 898         int copy;
 899         int tmp;
 900         struct sk_buff *skb;
 901         struct sk_buff *send_tmp;
 902         unsigned char *buff;
 903         struct proto *prot;
 904         struct device *dev = NULL;
 905 
 906         sk->inuse=1;
 907         prot = sk->prot;
 908         while(len > 0) 
 909         {
 910                 if (sk->err) 
 911                 {                       /* Stop on an error */
 912                         release_sock(sk);
 913                         if (copied) 
 914                                 return(copied);
 915                         tmp = -sk->err;
 916                         sk->err = 0;
 917                         return(tmp);
 918                 }
 919 
 920         /*
 921          *      First thing we do is make sure that we are established. 
 922          */
 923         
 924                 if (sk->shutdown & SEND_SHUTDOWN) 
 925                 {
 926                         release_sock(sk);
 927                         sk->err = EPIPE;
 928                         if (copied) 
 929                                 return(copied);
 930                         sk->err = 0;
 931                         return(-EPIPE);
 932                 }
 933 
 934 
 935         /* 
 936          *      Wait for a connection to finish.
 937          */
 938         
 939                 while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
 940                 {
 941                         if (sk->err) 
 942                         {
 943                                 release_sock(sk);
 944                                 if (copied) 
 945                                         return(copied);
 946                                 tmp = -sk->err;
 947                                 sk->err = 0;
 948                                 return(tmp);
 949                         }
 950 
 951                         if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
 952                         {
 953                                 release_sock(sk);
 954                                 if (copied) 
 955                                         return(copied);
 956 
 957                                 if (sk->err) 
 958                                 {
 959                                         tmp = -sk->err;
 960                                         sk->err = 0;
 961                                         return(tmp);
 962                                 }
 963 
 964                                 if (sk->keepopen) 
 965                                 {
 966                                         send_sig(SIGPIPE, current, 0);
 967                                 }
 968                                 return(-EPIPE);
 969                         }
 970 
 971                         if (nonblock || copied) 
 972                         {
 973                                 release_sock(sk);
 974                                 if (copied) 
 975                                         return(copied);
 976                                 return(-EAGAIN);
 977                         }
 978 
 979                         release_sock(sk);
 980                         cli();
 981                 
 982                         if (sk->state != TCP_ESTABLISHED &&
 983                                 sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
 984                         {
 985                                 interruptible_sleep_on(sk->sleep);
 986                                 if (current->signal & ~current->blocked) 
 987                                 {
 988                                         sti();
 989                                         if (copied) 
 990                                                 return(copied);
 991                                         return(-ERESTARTSYS);
 992                                 }
 993                         }
 994                         sk->inuse = 1;
 995                         sti();
 996                 }
 997 
 998         /*
 999          * The following code can result in copy <= if sk->mss is ever
1000          * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
1001          * sk->mtu is constant once SYN processing is finished.  I.e. we
1002          * had better not get here until we've seen his SYN and at least one
1003          * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
1004          * But ESTABLISHED should guarantee that.  sk->max_window is by definition
1005          * non-decreasing.  Note that any ioctl to set user_mss must be done
1006          * before the exchange of SYN's.  If the initial ack from the other
1007          * end has a window of 0, max_window and thus mss will both be 0.
1008          */
1009 
1010         /* 
1011          *      Now we need to check if we have a half built packet. 
1012          */
1013 
1014                 if ((skb = tcp_dequeue_partial(sk)) != NULL) 
1015                 {
1016                         int hdrlen;
1017 
1018                          /* IP header + TCP header */
1019                         hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
1020                                  + sizeof(struct tcphdr);
1021         
1022                         /* Add more stuff to the end of skb->len */
1023                         if (!(flags & MSG_OOB)) 
1024                         {
1025                                 copy = min(sk->mss - (skb->len - hdrlen), len);
1026                                 /* FIXME: this is really a bug. */
1027                                 if (copy <= 0) 
1028                                 {
1029                                         printk("TCP: **bug**: \"copy\" <= 0!!\n");
1030                                         copy = 0;
1031                                 }
1032           
1033                                 memcpy_fromfs(skb->data + skb->len, from, copy);
1034                                 skb->len += copy;
1035                                 from += copy;
1036                                 copied += copy;
1037                                 len -= copy;
1038                                 sk->write_seq += copy;
1039                         }
1040                         if ((skb->len - hdrlen) >= sk->mss ||
1041                                 (flags & MSG_OOB) || !sk->packets_out)
1042                                 tcp_send_skb(sk, skb);
1043                         else
1044                                 tcp_enqueue_partial(skb, sk);
1045                         continue;
1046                 }
1047 
1048         /*
1049          * We also need to worry about the window.
1050          * If window < 1/2 the maximum window we've seen from this
1051          *   host, don't use it.  This is sender side
1052          *   silly window prevention, as specified in RFC1122.
1053          *   (Note that this is different than earlier versions of
1054          *   SWS prevention, e.g. RFC813.).  What we actually do is 
1055          *   use the whole MSS.  Since the results in the right
1056          *   edge of the packet being outside the window, it will
1057          *   be queued for later rather than sent.
1058          */
1059 
1060                 copy = sk->window_seq - sk->write_seq;
1061                 if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
1062                         copy = sk->mss;
1063                 if (copy > len)
1064                         copy = len;
1065 
1066         /*
1067          *      We should really check the window here also. 
1068          */
1069          
1070                 send_tmp = NULL;
1071                 if (copy < sk->mss && !(flags & MSG_OOB)) 
1072                 {
1073                         /*
1074                          *      We will release the socket incase we sleep here. 
1075                          */
1076                         release_sock(sk);
1077                         /*
1078                          *      NB: following must be mtu, because mss can be increased.
1079                          *      mss is always <= mtu 
1080                          */
1081                         skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
1082                         sk->inuse = 1;
1083                         send_tmp = skb;
1084                 } 
1085                 else 
1086                 {
1087                         /*
1088                          *      We will release the socket incase we sleep here. 
1089                          */
1090                         release_sock(sk);
1091                         skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
1092                         sk->inuse = 1;
1093                 }
1094 
1095                 /*
1096                  *      If we didn't get any memory, we need to sleep. 
1097                  */
1098 
1099                 if (skb == NULL) 
1100                 {
1101                         if (nonblock /* || copied */) 
1102                         {
1103                                 release_sock(sk);
1104                                 if (copied) 
1105                                         return(copied);
1106                                 return(-EAGAIN);
1107                         }
1108 
1109                         /*
1110                          *      FIXME: here is another race condition. 
1111                          */
1112 
1113                         tmp = sk->wmem_alloc;
1114                         release_sock(sk);
1115                         cli();
1116                         /*
1117                          *      Again we will try to avoid it. 
1118                          */
1119                         if (tmp <= sk->wmem_alloc &&
1120                                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
1121                                 && sk->err == 0) 
1122                         {
1123                                 interruptible_sleep_on(sk->sleep);
1124                                 if (current->signal & ~current->blocked) 
1125                                 {
1126                                         sti();
1127                                         if (copied) 
1128                                                 return(copied);
1129                                         return(-ERESTARTSYS);
1130                                 }
1131                         }
1132                         sk->inuse = 1;
1133                         sti();
1134                         continue;
1135                 }
1136 
1137                 skb->len = 0;
1138                 skb->sk = sk;
1139                 skb->free = 0;
1140                 skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
1141         
1142                 buff = skb->data;
1143         
1144                 /*
1145                  * FIXME: we need to optimize this.
1146                  * Perhaps some hints here would be good.
1147                  */
1148                 
1149                 tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1150                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1151                 if (tmp < 0 ) 
1152                 {
1153                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1154                         release_sock(sk);
1155                         if (copied) 
1156                                 return(copied);
1157                         return(tmp);
1158                 }
1159                 skb->len += tmp;
1160                 skb->dev = dev;
1161                 buff += tmp;
1162                 skb->h.th =(struct tcphdr *) buff;
1163                 tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1164                 if (tmp < 0) 
1165                 {
1166                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1167                         release_sock(sk);
1168                         if (copied) 
1169                                 return(copied);
1170                         return(tmp);
1171                 }
1172 
1173                 if (flags & MSG_OOB) 
1174                 {
1175                         ((struct tcphdr *)buff)->urg = 1;
1176                         ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1177                 }
1178                 skb->len += tmp;
1179                 memcpy_fromfs(buff+tmp, from, copy);
1180 
1181                 from += copy;
1182                 copied += copy;
1183                 len -= copy;
1184                 skb->len += copy;
1185                 skb->free = 0;
1186                 sk->write_seq += copy;
1187         
1188                 if (send_tmp != NULL && sk->packets_out) 
1189                 {
1190                         tcp_enqueue_partial(send_tmp, sk);
1191                         continue;
1192                 }
1193                 tcp_send_skb(sk, skb);
1194         }
1195         sk->err = 0;
1196 
1197 /*
1198  *      Nagle's rule. Turn Nagle off with TCP_NODELAY for highly
1199  *      interactive fast network servers. It's meant to be on and
1200  *      it really improves the throughput though not the echo time
1201  *      on my slow slip link - Alan
1202  */
1203 
1204 /*
1205  *      Avoid possible race on send_tmp - c/o Johannes Stille 
1206  */
1207  
1208         if(sk->partial && ((!sk->packets_out) 
1209      /* If not nagling we can send on the before case too.. */
1210               || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1211         ))
1212                 tcp_send_partial(sk);
1213 
1214         release_sock(sk);
1215         return(copied);
1216 }
1217 
1218 
1219 static int tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1220            int len, int nonblock, unsigned flags,
1221            struct sockaddr_in *addr, int addr_len)
1222 {
1223         if (flags & ~(MSG_OOB|MSG_DONTROUTE))
1224                 return -EINVAL;
1225         if (!tcp_connected(sk->state))
1226                 return -ENOTCONN;
1227         if (addr_len < sizeof(*addr))
1228                 return -EINVAL;
1229         if (addr->sin_family && addr->sin_family != AF_INET) 
1230                 return -EINVAL;
1231         if (addr->sin_port != sk->dummy_th.dest) 
1232                 return -EISCONN;
1233         if (addr->sin_addr.s_addr != sk->daddr) 
1234                 return -EISCONN;
1235         return tcp_write(sk, from, len, nonblock, flags);
1236 }
1237 
1238 
1239 static void tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1240 {
1241         int tmp;
1242         struct device *dev = NULL;
1243         struct tcphdr *t1;
1244         struct sk_buff *buff;
1245 
1246         if (!sk->ack_backlog) 
1247                 return;
1248 
1249         /*
1250          * FIXME: we need to put code here to prevent this routine from
1251          * being called.  Being called once in a while is ok, so only check
1252          * if this is the second time in a row.
1253          */
1254 
1255         /*
1256          * We need to grab some memory, and put together an ack,
1257          * and then put it into the queue to be sent.
1258          */
1259 
1260         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1261         if (buff == NULL) 
1262         {
1263                 /* Try again real soon. */
1264                 reset_timer(sk, TIME_WRITE, 10);
1265                 return;
1266         }
1267 
1268         buff->len = sizeof(struct tcphdr);
1269         buff->sk = sk;
1270         buff->localroute = sk->localroute;
1271         
1272         /*
1273          *      Put in the IP header and routing stuff. 
1274          */
1275 
1276         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1277                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1278         if (tmp < 0) 
1279         {
1280                 buff->free=1;
1281                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1282                 return;
1283         }
1284 
1285         buff->len += tmp;
1286         t1 =(struct tcphdr *)(buff->data +tmp);
1287 
1288         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1289         t1->seq = htonl(sk->sent_seq);
1290         t1->ack = 1;
1291         t1->res1 = 0;
1292         t1->res2 = 0;
1293         t1->rst = 0;
1294         t1->urg = 0;
1295         t1->syn = 0;
1296         t1->psh = 0;
1297         sk->ack_backlog = 0;
1298         sk->bytes_rcv = 0;
1299         sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
1300         t1->window = ntohs(sk->window);
1301         t1->ack_seq = ntohl(sk->acked_seq);
1302         t1->doff = sizeof(*t1)/4;
1303         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1304         sk->prot->queue_xmit(sk, dev, buff, 1);
1305         tcp_statistics.TcpOutSegs++;
1306 }
1307 
1308 
1309 /*
1310  *      FIXME:
1311  *      This routine frees used buffers.
1312  *      It should consider sending an ACK to let the
1313  *      other end know we now have a bigger window.
1314  */
1315 
1316 static void cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1317 {
1318         unsigned long flags;
1319         unsigned long left;
1320         struct sk_buff *skb;
1321         unsigned long rspace;
1322 
1323         if(sk->debug)
1324                 printk("cleaning rbuf for sk=%p\n", sk);
1325   
1326         save_flags(flags);
1327         cli();
1328   
1329         left = sk->prot->rspace(sk);
1330  
1331         /*
1332          * We have to loop through all the buffer headers,
1333          * and try to free up all the space we can.
1334          */
1335 
1336         while((skb=skb_peek(&sk->receive_queue)) != NULL) 
1337         {
1338                 if (!skb->used) 
1339                         break;
1340                 skb_unlink(skb);
1341                 skb->sk = sk;
1342                 kfree_skb(skb, FREE_READ);
1343         }
1344 
1345         restore_flags(flags);
1346 
1347         /*
1348          * FIXME:
1349          * At this point we should send an ack if the difference
1350          * in the window, and the amount of space is bigger than
1351          * TCP_WINDOW_DIFF.
1352          */
1353 
1354         if(sk->debug)
1355                 printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk),
1356                                             left);
1357         if ((rspace=sk->prot->rspace(sk)) != left) 
1358         {
1359                 /*
1360                  * This area has caused the most trouble.  The current strategy
1361                  * is to simply do nothing if the other end has room to send at
1362                  * least 3 full packets, because the ack from those will auto-
1363                  * matically update the window.  If the other end doesn't think
1364                  * we have much space left, but we have room for at least 1 more
1365                  * complete packet than it thinks we do, we will send an ack
1366                  * immediately.  Otherwise we will wait up to .5 seconds in case
1367                  * the user reads some more.
1368                  */
1369                 sk->ack_backlog++;
1370         /*
1371          * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1372          * if the other end is offering a window smaller than the agreed on MSS
1373          * (called sk->mtu here).  In theory there's no connection between send
1374          * and receive, and so no reason to think that they're going to send
1375          * small packets.  For the moment I'm using the hack of reducing the mss
1376          * only on the send side, so I'm putting mtu here.
1377          */
1378 
1379                 if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) 
1380                 {
1381                         /* Send an ack right now. */
1382                         tcp_read_wakeup(sk);
1383                 } 
1384                 else 
1385                 {
1386                         /* Force it to send an ack soon. */
1387                         int was_active = del_timer(&sk->timer);
1388                         if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
1389                         {
1390                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1391                         } 
1392                         else
1393                                 add_timer(&sk->timer);
1394                 }
1395         }
1396 } 
1397 
1398 
1399 /*
1400  *      Handle reading urgent data. 
1401  */
1402  
1403 static int tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1404              unsigned char *to, int len, unsigned flags)
1405 {
1406         struct wait_queue wait = { current, NULL };
1407 
1408         while (len > 0) 
1409         {
1410                 if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1411                         return -EINVAL;
1412                 if (sk->urg_data & URG_VALID) 
1413                 {
1414                         char c = sk->urg_data;
1415                         if (!(flags & MSG_PEEK))
1416                                 sk->urg_data = URG_READ;
1417                         put_fs_byte(c, to);
1418                         return 1;
1419                 }
1420 
1421                 if (sk->err) 
1422                 {
1423                         int tmp = -sk->err;
1424                         sk->err = 0;
1425                         return tmp;
1426                 }
1427 
1428                 if (sk->state == TCP_CLOSE || sk->done) 
1429                 {
1430                         if (!sk->done) {
1431                                 sk->done = 1;
1432                                 return 0;
1433                         }
1434                         return -ENOTCONN;
1435                 }
1436 
1437                 if (sk->shutdown & RCV_SHUTDOWN) 
1438                 {
1439                         sk->done = 1;
1440                         return 0;
1441                 }
1442 
1443                 if (nonblock)
1444                         return -EAGAIN;
1445 
1446                 if (current->signal & ~current->blocked)
1447                         return -ERESTARTSYS;
1448 
1449                 current->state = TASK_INTERRUPTIBLE;
1450                 add_wait_queue(sk->sleep, &wait);
1451                 if ((sk->urg_data & URG_NOTYET) && sk->err == 0 &&
1452                     !(sk->shutdown & RCV_SHUTDOWN))
1453                         schedule();
1454                 remove_wait_queue(sk->sleep, &wait);
1455                 current->state = TASK_RUNNING;
1456         }
1457         return 0;
1458 }
1459 
1460 
1461 /*
1462  *      This routine copies from a sock struct into the user buffer. 
1463  */
1464  
1465 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1466         int len, int nonblock, unsigned flags)
1467 {
1468         struct wait_queue wait = { current, NULL };
1469         int copied = 0;
1470         unsigned long peek_seq;
1471         unsigned long *seq;
1472         unsigned long used;
1473 
1474         /* This error should be checked. */
1475         if (sk->state == TCP_LISTEN)
1476                 return -ENOTCONN;
1477 
1478         /* Urgent data needs to be handled specially. */
1479         if (flags & MSG_OOB)
1480                 return tcp_read_urg(sk, nonblock, to, len, flags);
1481 
1482         peek_seq = sk->copied_seq;
1483         seq = &sk->copied_seq;
1484         if (flags & MSG_PEEK)
1485                 seq = &peek_seq;
1486 
1487         add_wait_queue(sk->sleep, &wait);
1488         sk->inuse = 1;
1489         while (len > 0) 
1490         {
1491                 struct sk_buff * skb;
1492                 unsigned long offset;
1493         
1494                 /*
1495                  * are we at urgent data? Stop if we have read anything.
1496                  */
1497                 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1498                         break;
1499 
1500                 current->state = TASK_INTERRUPTIBLE;
1501 
1502                 skb = skb_peek(&sk->receive_queue);
1503                 do 
1504                 {
1505                         if (!skb)
1506                                 break;
1507                         if (before(1+*seq, skb->h.th->seq))
1508                                 break;
1509                         offset = 1 + *seq - skb->h.th->seq;
1510                         if (skb->h.th->syn)
1511                                 offset--;
1512                         if (offset < skb->len)
1513                                 goto found_ok_skb;
1514                         if (!(flags & MSG_PEEK))
1515                                 skb->used = 1;
1516                         skb = skb->next;
1517                 }
1518                 while (skb != (struct sk_buff *)&sk->receive_queue);
1519 
1520                 if (copied)
1521                         break;
1522 
1523                 if (sk->err) 
1524                 {
1525                         copied = -sk->err;
1526                         sk->err = 0;
1527                         break;
1528                 }
1529 
1530                 if (sk->state == TCP_CLOSE) 
1531                 {
1532                         if (!sk->done) 
1533                         {
1534                                 sk->done = 1;
1535                                 break;
1536                         }
1537                         copied = -ENOTCONN;
1538                         break;
1539                 }
1540 
1541                 if (sk->shutdown & RCV_SHUTDOWN) 
1542                 {
1543                         sk->done = 1;
1544                         break;
1545                 }
1546                         
1547                 if (nonblock) 
1548                 {
1549                         copied = -EAGAIN;
1550                         break;
1551                 }
1552 
1553                 cleanup_rbuf(sk);
1554                 release_sock(sk);
1555                 schedule();
1556                 sk->inuse = 1;
1557 
1558                 if (current->signal & ~current->blocked) 
1559                 {
1560                         copied = -ERESTARTSYS;
1561                         break;
1562                 }
1563                 continue;
1564 
1565         found_ok_skb:
1566                 /* Ok so how much can we use ? */
1567                 used = skb->len - offset;
1568                 if (len < used)
1569                         used = len;
1570                 /* do we have urgent data here? */
1571                 if (sk->urg_data) 
1572                 {
1573                         unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1574                         if (urg_offset < used) 
1575                         {
1576                                 if (!urg_offset) 
1577                                 {
1578                                         if (!sk->urginline) 
1579                                         {
1580                                                 ++*seq;
1581                                                 offset++;
1582                                                 used--;
1583                                         }
1584                                 }
1585                                 else
1586                                         used = urg_offset;
1587                         }
1588                 }
1589                 /* Copy it */
1590                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1591                         skb->h.th->doff*4 + offset, used);
1592                 copied += used;
1593                 len -= used;
1594                 to += used;
1595                 *seq += used;
1596                 if (after(sk->copied_seq+1,sk->urg_seq))
1597                         sk->urg_data = 0;
1598                 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1599                         skb->used = 1;
1600         }
1601         remove_wait_queue(sk->sleep, &wait);
1602         current->state = TASK_RUNNING;
1603 
1604         /* Clean up data we have read: This will do ACK frames */
1605         cleanup_rbuf(sk);
1606         release_sock(sk);
1607         return copied;
1608 }
1609 
1610  
1611 /*
1612  *      Shutdown the sending side of a connection.
1613  */
1614 
1615 void tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1616 {
1617         struct sk_buff *buff;
1618         struct tcphdr *t1, *th;
1619         struct proto *prot;
1620         int tmp;
1621         struct device *dev = NULL;
1622 
1623         /*
1624          * We need to grab some memory, and put together a FIN,
1625          * and then put it into the queue to be sent.
1626          * FIXME:
1627          *
1628          *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1629          *      Most of this is guesswork, so maybe it will work...
1630          */
1631 
1632         if (!(how & SEND_SHUTDOWN)) 
1633                 return;
1634          
1635         /*
1636          *      If we've already sent a FIN, return. 
1637          */
1638          
1639         if (sk->state == TCP_FIN_WAIT1 ||
1640             sk->state == TCP_FIN_WAIT2 ||
1641             sk->state == TCP_CLOSING ||
1642             sk->state == TCP_LAST_ACK ||
1643             sk->state == TCP_TIME_WAIT
1644         ) 
1645         {
1646                 return;
1647         }
1648         sk->inuse = 1;
1649 
1650         /*
1651          * flag that the sender has shutdown
1652          */
1653 
1654         sk->shutdown |= SEND_SHUTDOWN;
1655 
1656         /*
1657          *  Clear out any half completed packets. 
1658          */
1659 
1660         if (sk->partial)
1661                 tcp_send_partial(sk);
1662 
1663         prot =(struct proto *)sk->prot;
1664         th =(struct tcphdr *)&sk->dummy_th;
1665         release_sock(sk); /* incase the malloc sleeps. */
1666         buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1667         if (buff == NULL)
1668                 return;
1669         sk->inuse = 1;
1670 
1671         buff->sk = sk;
1672         buff->len = sizeof(*t1);
1673         buff->localroute = sk->localroute;
1674         t1 =(struct tcphdr *) buff->data;
1675 
1676         /*
1677          *      Put in the IP header and routing stuff. 
1678          */
1679 
1680         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1681                            IPPROTO_TCP, sk->opt,
1682                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1683         if (tmp < 0) 
1684         {
1685                 /*
1686                  *      Finish anyway, treat this as a send that got lost. 
1687                  *
1688                  *      Enter FIN_WAIT1 on normal shutdown, which waits for
1689                  *      written data to be completely acknowledged along
1690                  *      with an acknowledge to our FIN.
1691                  *
1692                  *      Enter FIN_WAIT2 on abnormal shutdown -- close before
1693                  *      connection established.
1694                  */
1695                 buff->free=1;
1696                 prot->wfree(sk,buff->mem_addr, buff->mem_len);
1697 
1698                 if (sk->state == TCP_ESTABLISHED)
1699                         tcp_set_state(sk,TCP_FIN_WAIT1);
1700                 else if(sk->state == TCP_CLOSE_WAIT)
1701                         tcp_set_state(sk,TCP_LAST_ACK);
1702                 else
1703                         tcp_set_state(sk,TCP_FIN_WAIT2);
1704 
1705                 release_sock(sk);
1706                 return;
1707         }
1708 
1709         t1 =(struct tcphdr *)((char *)t1 +tmp);
1710         buff->len += tmp;
1711         buff->dev = dev;
1712         memcpy(t1, th, sizeof(*t1));
1713         t1->seq = ntohl(sk->write_seq);
1714         sk->write_seq++;
1715         buff->h.seq = sk->write_seq;
1716         t1->ack = 1;
1717         t1->ack_seq = ntohl(sk->acked_seq);
1718         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
1719         t1->fin = 1;
1720         t1->rst = 0;
1721         t1->doff = sizeof(*t1)/4;
1722         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1723 
1724         /*
1725          * If there is data in the write queue, the fin must be appended to
1726          * the write queue.
1727          */
1728         
1729         if (skb_peek(&sk->write_queue) != NULL) 
1730         {
1731                 buff->free=0;
1732                 if (buff->next != NULL) 
1733                 {
1734                         printk("tcp_shutdown: next != NULL\n");
1735                         skb_unlink(buff);
1736                 }
1737                 skb_queue_tail(&sk->write_queue, buff);
1738         } 
1739         else 
1740         {
1741                 sk->sent_seq = sk->write_seq;
1742                 sk->prot->queue_xmit(sk, dev, buff, 0);
1743         }
1744 
1745         if (sk->state == TCP_ESTABLISHED) 
1746                 tcp_set_state(sk,TCP_FIN_WAIT1);
1747         else if (sk->state == TCP_CLOSE_WAIT)
1748                 tcp_set_state(sk,TCP_LAST_ACK);
1749         else
1750                 tcp_set_state(sk,TCP_FIN_WAIT2);
1751 
1752         release_sock(sk);
1753 }
1754 
1755 
1756 static int
1757 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1758              int to_len, int nonblock, unsigned flags,
1759              struct sockaddr_in *addr, int *addr_len)
1760 {
1761         int result;
1762   
1763         /* 
1764          *      Have to check these first unlike the old code. If 
1765          *      we check them after we lose data on an error
1766          *      which is wrong 
1767          */
1768 
1769         if(addr_len)
1770                 *addr_len = sizeof(*addr);
1771         result=tcp_read(sk, to, to_len, nonblock, flags);
1772 
1773         if (result < 0) 
1774                 return(result);
1775   
1776         if(addr)
1777         {
1778                 addr->sin_family = AF_INET;
1779                 addr->sin_port = sk->dummy_th.dest;
1780                 addr->sin_addr.s_addr = sk->daddr;
1781         }
1782         return(result);
1783 }
1784 
1785 
1786 /*
1787  *      This routine will send an RST to the other tcp. 
1788  */
1789  
1790 static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1791           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1792 {
1793         struct sk_buff *buff;
1794         struct tcphdr *t1;
1795         int tmp;
1796         struct device *ndev=NULL;
1797   
1798 /*
1799  * We need to grab some memory, and put together an RST,
1800  * and then put it into the queue to be sent.
1801  */
1802 
1803         buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1804         if (buff == NULL) 
1805                 return;
1806 
1807         buff->len = sizeof(*t1);
1808         buff->sk = NULL;
1809         buff->dev = dev;
1810         buff->localroute = 0;
1811 
1812         t1 =(struct tcphdr *) buff->data;
1813 
1814         /*
1815          *      Put in the IP header and routing stuff. 
1816          */
1817 
1818         tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
1819                            sizeof(struct tcphdr),tos,ttl);
1820         if (tmp < 0) 
1821         {
1822                 buff->free = 1;
1823                 prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1824                 return;
1825         }
1826 
1827         t1 =(struct tcphdr *)((char *)t1 +tmp);
1828         buff->len += tmp;
1829         memcpy(t1, th, sizeof(*t1));
1830 
1831         /*
1832          *      Swap the send and the receive. 
1833          */
1834 
1835         t1->dest = th->source;
1836         t1->source = th->dest;
1837         t1->rst = 1;  
1838         t1->window = 0;
1839   
1840         if(th->ack)
1841         {
1842                 t1->ack = 0;
1843                 t1->seq = th->ack_seq;
1844                 t1->ack_seq = 0;
1845         }
1846         else
1847         {
1848                 t1->ack = 1;
1849                 if(!th->syn)
1850                         t1->ack_seq=htonl(th->seq);
1851                 else
1852                         t1->ack_seq=htonl(th->seq+1);
1853                 t1->seq=0;
1854         }
1855 
1856         t1->syn = 0;
1857         t1->urg = 0;
1858         t1->fin = 0;
1859         t1->psh = 0;
1860         t1->doff = sizeof(*t1)/4;
1861         tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1862         prot->queue_xmit(NULL, dev, buff, 1);
1863         tcp_statistics.TcpOutSegs++;
1864 }
1865 
1866 
1867 /*
1868  *      Look for tcp options. Parses everything but only knows about MSS.
1869  *      This routine is always called with the packet containing the SYN.
1870  *      However it may also be called with the ack to the SYN.  So you
1871  *      can't assume this is always the SYN.  It's always called after
1872  *      we have set up sk->mtu to our own MTU.
1873  */
1874  
1875 static void tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1876 {
1877         unsigned char *ptr;
1878         int length=(th->doff*4)-sizeof(struct tcphdr);
1879         int mss_seen = 0;
1880     
1881         ptr = (unsigned char *)(th + 1);
1882   
1883         while(length>0)
1884         {
1885                 int opcode=*ptr++;
1886                 int opsize=*ptr++;
1887                 switch(opcode)
1888                 {
1889                         case TCPOPT_EOL:
1890                                 return;
1891                         case TCPOPT_NOP:
1892                                 length-=2;
1893                                 continue;
1894                         
1895                         default:
1896                                 if(opsize<=2)   /* Avoid silly options looping forever */
1897                                         return;
1898                                 switch(opcode)
1899                                 {
1900                                         case TCPOPT_MSS:
1901                                                 if(opsize==4 && th->syn)
1902                                                 {
1903                                                         sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1904                                                         mss_seen = 1;
1905                                                 }
1906                                                 break;
1907                                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1908                                 }
1909                                 ptr+=opsize-2;
1910                                 length-=opsize;
1911                 }
1912         }
1913         if (th->syn) 
1914         {
1915                 if (! mss_seen)
1916                       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1917         }
1918 #ifdef CONFIG_INET_PCTCP
1919         sk->mss = min(sk->max_window >> 1, sk->mtu);
1920 #else    
1921         sk->mss = min(sk->max_window, sk->mtu);
1922 #endif  
1923 }
1924 
1925 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1926 {
1927         dst = ntohl(dst);
1928         if (IN_CLASSA(dst))
1929                 return htonl(IN_CLASSA_NET);
1930         if (IN_CLASSB(dst))
1931                 return htonl(IN_CLASSB_NET);
1932         return htonl(IN_CLASSC_NET);
1933 }
1934 
1935 /*
1936  *      This routine handles a connection request.
1937  *      It should make sure we haven't already responded.
1938  *      Because of the way BSD works, we have to send a syn/ack now.
1939  *      This also means it will be harder to close a socket which is
1940  *      listening.
1941  */
1942  
1943 static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1944                  unsigned long daddr, unsigned long saddr,
1945                  struct options *opt, struct device *dev)
1946 {
1947         struct sk_buff *buff;
1948         struct tcphdr *t1;
1949         unsigned char *ptr;
1950         struct sock *newsk;
1951         struct tcphdr *th;
1952         struct device *ndev=NULL;
1953         int tmp;
1954         struct rtable *rt;
1955   
1956         th = skb->h.th;
1957 
1958         /* If the socket is dead, don't accept the connection. */
1959         if (!sk->dead) 
1960         {
1961                 sk->data_ready(sk,0);
1962         }
1963         else 
1964         {
1965                 tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1966                 tcp_statistics.TcpAttemptFails++;
1967                 kfree_skb(skb, FREE_READ);
1968                 return;
1969         }
1970 
1971         /*
1972          * Make sure we can accept more.  This will prevent a
1973          * flurry of syns from eating up all our memory.
1974          */
1975 
1976         if (sk->ack_backlog >= sk->max_ack_backlog) 
1977         {
1978                 tcp_statistics.TcpAttemptFails++;
1979                 kfree_skb(skb, FREE_READ);
1980                 return;
1981         }
1982 
1983         /*
1984          * We need to build a new sock struct.
1985          * It is sort of bad to have a socket without an inode attached
1986          * to it, but the wake_up's will just wake up the listening socket,
1987          * and if the listening socket is destroyed before this is taken
1988          * off of the queue, this will take care of it.
1989          */
1990 
1991         newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
1992         if (newsk == NULL) 
1993         {
1994                 /* just ignore the syn.  It will get retransmitted. */
1995                 tcp_statistics.TcpAttemptFails++;
1996                 kfree_skb(skb, FREE_READ);
1997                 return;
1998         }
1999 
2000         memcpy(newsk, sk, sizeof(*newsk));
2001         skb_queue_head_init(&newsk->write_queue);
2002         skb_queue_head_init(&newsk->receive_queue);
2003         newsk->send_head = NULL;
2004         newsk->send_tail = NULL;
2005         skb_queue_head_init(&newsk->back_log);
2006         newsk->rtt = 0;         /*TCP_CONNECT_TIME<<3*/
2007         newsk->rto = TCP_TIMEOUT_INIT;
2008         newsk->mdev = 0;
2009         newsk->max_window = 0;
2010         newsk->cong_window = 1;
2011         newsk->cong_count = 0;
2012         newsk->ssthresh = 0;
2013         newsk->backoff = 0;
2014         newsk->blog = 0;
2015         newsk->intr = 0;
2016         newsk->proc = 0;
2017         newsk->done = 0;
2018         newsk->partial = NULL;
2019         newsk->pair = NULL;
2020         newsk->wmem_alloc = 0;
2021         newsk->rmem_alloc = 0;
2022         newsk->localroute = sk->localroute;
2023 
2024         newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
2025 
2026         newsk->err = 0;
2027         newsk->shutdown = 0;
2028         newsk->ack_backlog = 0;
2029         newsk->acked_seq = skb->h.th->seq+1;
2030         newsk->fin_seq = skb->h.th->seq;
2031         newsk->copied_seq = skb->h.th->seq;
2032         newsk->state = TCP_SYN_RECV;
2033         newsk->timeout = 0;
2034         newsk->write_seq = jiffies * SEQ_TICK - seq_offset;
2035         newsk->window_seq = newsk->write_seq;
2036         newsk->rcv_ack_seq = newsk->write_seq;
2037         newsk->urg_data = 0;
2038         newsk->retransmits = 0;
2039         newsk->destroy = 0;
2040         init_timer(&newsk->timer);
2041         newsk->timer.data = (unsigned long)newsk;
2042         newsk->timer.function = &net_timer;
2043         newsk->dummy_th.source = skb->h.th->dest;
2044         newsk->dummy_th.dest = skb->h.th->source;
2045         
2046         /*
2047          *      Swap these two, they are from our point of view. 
2048          */
2049          
2050         newsk->daddr = saddr;
2051         newsk->saddr = daddr;
2052 
2053         put_sock(newsk->num,newsk);
2054         newsk->dummy_th.res1 = 0;
2055         newsk->dummy_th.doff = 6;
2056         newsk->dummy_th.fin = 0;
2057         newsk->dummy_th.syn = 0;
2058         newsk->dummy_th.rst = 0;        
2059         newsk->dummy_th.psh = 0;
2060         newsk->dummy_th.ack = 0;
2061         newsk->dummy_th.urg = 0;
2062         newsk->dummy_th.res2 = 0;
2063         newsk->acked_seq = skb->h.th->seq + 1;
2064         newsk->copied_seq = skb->h.th->seq;
2065 
2066         /*
2067          *      Grab the ttl and tos values and use them 
2068          */
2069 
2070         newsk->ip_ttl=sk->ip_ttl;
2071         newsk->ip_tos=skb->ip_hdr->tos;
2072 
2073         /*
2074          *      Use 512 or whatever user asked for 
2075          */
2076 
2077         /*
2078          *      Note use of sk->user_mss, since user has no direct access to newsk 
2079          */
2080 
2081         rt=ip_rt_route(saddr, NULL,NULL);
2082         
2083         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
2084                 newsk->window_clamp = rt->rt_window;
2085         else
2086                 newsk->window_clamp = 0;
2087                 
2088         if (sk->user_mss)
2089                 newsk->mtu = sk->user_mss;
2090         else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
2091                 newsk->mtu = rt->rt_mss - HEADER_SIZE;
2092         else 
2093         {
2094 #ifdef CONFIG_INET_SNARL        /* Sub Nets Are Local */
2095                 if ((saddr ^ daddr) & default_mask(saddr))
2096 #else
2097                 if ((saddr ^ daddr) & dev->pa_mask)
2098 #endif
2099                         newsk->mtu = 576 - HEADER_SIZE;
2100                 else
2101                         newsk->mtu = MAX_WINDOW;
2102         }
2103 
2104         /*
2105          *      But not bigger than device MTU 
2106          */
2107 
2108         newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
2109 
2110         /*
2111          *      This will min with what arrived in the packet 
2112          */
2113 
2114         tcp_options(newsk,skb->h.th);
2115 
2116         buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
2117         if (buff == NULL) 
2118         {
2119                 sk->err = -ENOMEM;
2120                 newsk->dead = 1;
2121                 release_sock(newsk);
2122                 kfree_skb(skb, FREE_READ);
2123                 tcp_statistics.TcpAttemptFails++;
2124                 return;
2125         }
2126   
2127         buff->len = sizeof(struct tcphdr)+4;
2128         buff->sk = newsk;
2129         buff->localroute = newsk->localroute;
2130 
2131         t1 =(struct tcphdr *) buff->data;
2132 
2133         /*
2134          *      Put in the IP header and routing stuff. 
2135          */
2136 
2137         tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
2138                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2139 
2140         /*
2141          *      Something went wrong. 
2142          */
2143 
2144         if (tmp < 0) 
2145         {
2146                 sk->err = tmp;
2147                 buff->free=1;
2148                 kfree_skb(buff,FREE_WRITE);
2149                 newsk->dead = 1;
2150                 release_sock(newsk);
2151                 skb->sk = sk;
2152                 kfree_skb(skb, FREE_READ);
2153                 tcp_statistics.TcpAttemptFails++;
2154                 return;
2155         }
2156 
2157         buff->len += tmp;
2158         t1 =(struct tcphdr *)((char *)t1 +tmp);
2159   
2160         memcpy(t1, skb->h.th, sizeof(*t1));
2161         buff->h.seq = newsk->write_seq;
2162         /*
2163          *      Swap the send and the receive. 
2164          */
2165         t1->dest = skb->h.th->source;
2166         t1->source = newsk->dummy_th.source;
2167         t1->seq = ntohl(newsk->write_seq++);
2168         t1->ack = 1;
2169         newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/
2170         newsk->sent_seq = newsk->write_seq;
2171         t1->window = ntohs(newsk->window);
2172         t1->res1 = 0;
2173         t1->res2 = 0;
2174         t1->rst = 0;
2175         t1->urg = 0;
2176         t1->psh = 0;
2177         t1->syn = 1;
2178         t1->ack_seq = ntohl(skb->h.th->seq+1);
2179         t1->doff = sizeof(*t1)/4+1;
2180         ptr =(unsigned char *)(t1+1);
2181         ptr[0] = 2;
2182         ptr[1] = 4;
2183         ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2184         ptr[3] =(newsk->mtu) & 0xff;
2185 
2186         tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2187         newsk->prot->queue_xmit(newsk, dev, buff, 0);
2188 
2189         reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_TIMEOUT_INIT);
2190         skb->sk = newsk;
2191 
2192         /*
2193          *      Charge the sock_buff to newsk. 
2194          */
2195          
2196         sk->rmem_alloc -= skb->mem_len;
2197         newsk->rmem_alloc += skb->mem_len;
2198         
2199         skb_queue_tail(&sk->receive_queue,skb);
2200         sk->ack_backlog++;
2201         release_sock(newsk);
2202         tcp_statistics.TcpOutSegs++;
2203 }
2204 
2205 
2206 static void tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2207 {
2208         struct sk_buff *buff;
2209         int need_reset = 0;
2210         struct tcphdr *t1, *th;
2211         struct proto *prot;
2212         struct device *dev=NULL;
2213         int tmp;
2214 
2215         /*
2216          * We need to grab some memory, and put together a FIN, 
2217          * and then put it into the queue to be sent.
2218          */
2219         sk->inuse = 1;
2220         sk->keepopen = 1;
2221         sk->shutdown = SHUTDOWN_MASK;
2222 
2223         if (!sk->dead) 
2224                 sk->state_change(sk);
2225 
2226         if (timeout == 0) 
2227         {
2228                 /*
2229                  *  We need to flush the recv. buffs.  We do this only on the
2230                  *  descriptor close, not protocol-sourced closes, because the
2231                  *  reader process may not have drained the data yet!
2232                  */
2233 
2234                 if (skb_peek(&sk->receive_queue) != NULL) 
2235                 {
2236                         struct sk_buff *skb;
2237                         if(sk->debug)
2238                                 printk("Clean rcv queue\n");
2239                         while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
2240                         {
2241                                 /* The +1 is not needed because the FIN takes up seq
2242                                    is not read!!! */
2243                                 if(skb->len > 0 && after(skb->h.th->seq + skb->len , sk->copied_seq))
2244                                         need_reset = 0;
2245                                 kfree_skb(skb, FREE_READ);
2246                         }
2247                         if(sk->debug)
2248                                 printk("Cleaned.\n");
2249                 }
2250         }
2251 
2252         /*
2253          *      Get rid off any half-completed packets. 
2254          */
2255          
2256         if (sk->partial) 
2257         {
2258                 tcp_send_partial(sk);
2259         }
2260 
2261         switch(sk->state) 
2262         {
2263                 case TCP_FIN_WAIT1:
2264                 case TCP_FIN_WAIT2:
2265                 case TCP_CLOSING:
2266                         /*
2267                          * These states occur when we have already closed out
2268                          * our end.  If there is no timeout, we do not do
2269                          * anything.  We may still be in the middle of sending
2270                          * the remainder of our buffer, for example...
2271                          * resetting the timer would be inappropriate.
2272                          *
2273                          * XXX if retransmit count reaches limit, is tcp_close()
2274                          * called with timeout == 1 ? if not, we need to fix that.
2275                          */
2276                         if (!timeout) {
2277                                 int timer_active;
2278 
2279                                 timer_active = del_timer(&sk->timer);
2280                                 if (timer_active)
2281                                         add_timer(&sk->timer);
2282                                 else
2283                                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2284                         }
2285 #ifdef NOTDEF
2286                         /* 
2287                          *      Start a timer.
2288                          * original code was 4 * sk->rtt.  In converting to the
2289                          * new rtt representation, we can't quite use that.
2290                          * it seems to make most sense to  use the backed off value
2291                          */
2292                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2293 #endif
2294                         if (timeout) 
2295                                 tcp_time_wait(sk);
2296                         release_sock(sk);
2297                         return; /* break causes a double release - messy */
2298                 case TCP_TIME_WAIT:
2299                 case TCP_LAST_ACK:
2300                         /*
2301                          * A timeout from these states terminates the TCB.
2302                          */
2303                         if (timeout) 
2304                         {
2305                                 tcp_set_state(sk,TCP_CLOSE);
2306                         }
2307                         release_sock(sk);
2308                         return;
2309                 case TCP_LISTEN:
2310                         tcp_set_state(sk,TCP_CLOSE);
2311                         release_sock(sk);
2312                         return;
2313                 case TCP_CLOSE:
2314                         release_sock(sk);
2315                         return;
2316                 case TCP_CLOSE_WAIT:
2317                 case TCP_ESTABLISHED:
2318                 case TCP_SYN_SENT:
2319                 case TCP_SYN_RECV:
2320                         prot =(struct proto *)sk->prot;
2321                         th =(struct tcphdr *)&sk->dummy_th;
2322                         buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2323                         if (buff == NULL) 
2324                         {
2325                                 /* This will force it to try again later. */
2326                                 /* Or it would have if someone released the socket
2327                                    first. Anyway it might work now */
2328                                 release_sock(sk);
2329                                 if (sk->state != TCP_CLOSE_WAIT)
2330                                         tcp_set_state(sk,TCP_ESTABLISHED);
2331                                 reset_timer(sk, TIME_CLOSE, 100);
2332                                 return;
2333                         }
2334                         buff->sk = sk;
2335                         buff->free = 1;
2336                         buff->len = sizeof(*t1);
2337                         buff->localroute = sk->localroute;
2338                         t1 =(struct tcphdr *) buff->data;
2339         
2340                         /*
2341                          *      Put in the IP header and routing stuff. 
2342                          */
2343                         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2344                                          IPPROTO_TCP, sk->opt,
2345                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2346                         if (tmp < 0) 
2347                         {
2348                                 sk->write_seq++;        /* Very important 8) */
2349                                 kfree_skb(buff,FREE_WRITE);
2350 
2351                                 /*
2352                                  * Enter FIN_WAIT1 to await completion of
2353                                  * written out data and ACK to our FIN.
2354                                  */
2355 
2356                                 if(sk->state==TCP_ESTABLISHED)
2357                                         tcp_set_state(sk,TCP_FIN_WAIT1);
2358                                 else
2359                                         tcp_set_state(sk,TCP_FIN_WAIT2);
2360                                 reset_timer(sk, TIME_CLOSE,4*sk->rto);
2361                                 if(timeout)
2362                                         tcp_time_wait(sk);
2363 
2364                                 release_sock(sk);
2365                                 return;
2366                         }
2367 
2368                         t1 =(struct tcphdr *)((char *)t1 +tmp);
2369                         buff->len += tmp;
2370                         buff->dev = dev;
2371                         memcpy(t1, th, sizeof(*t1));
2372                         t1->seq = ntohl(sk->write_seq);
2373                         sk->write_seq++;
2374                         buff->h.seq = sk->write_seq;
2375                         t1->ack = 1;
2376         
2377                         /* 
2378                          *      Ack everything immediately from now on. 
2379                          */
2380 
2381                         sk->delay_acks = 0;
2382                         t1->ack_seq = ntohl(sk->acked_seq);
2383                         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
2384                         t1->fin = 1;
2385                         t1->rst = need_reset;
2386                         t1->doff = sizeof(*t1)/4;
2387                         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2388 
2389                         tcp_statistics.TcpOutSegs++;
2390         
2391                         if (skb_peek(&sk->write_queue) == NULL) 
2392                         {
2393                                 sk->sent_seq = sk->write_seq;
2394                                 prot->queue_xmit(sk, dev, buff, 0);
2395                         } 
2396                         else 
2397                         {
2398                                 reset_timer(sk, TIME_WRITE, sk->rto);
2399                                 if (buff->next != NULL) 
2400                                 {
2401                                         printk("tcp_close: next != NULL\n");
2402                                         skb_unlink(buff);
2403                                 }
2404                                 skb_queue_tail(&sk->write_queue, buff);
2405                         }
2406 
2407                         /*
2408                          * If established (normal close), enter FIN_WAIT1.
2409                          * If in CLOSE_WAIT, enter LAST_ACK
2410                          * If in CLOSING, remain in CLOSING
2411                          * otherwise enter FIN_WAIT2
2412                          */
2413 
2414                         if (sk->state == TCP_ESTABLISHED)
2415                                 tcp_set_state(sk,TCP_FIN_WAIT1);
2416                         else if (sk->state == TCP_CLOSE_WAIT)
2417                                 tcp_set_state(sk,TCP_LAST_ACK);
2418                         else if (sk->state != TCP_CLOSING)
2419                                 tcp_set_state(sk,TCP_FIN_WAIT2);
2420         }
2421         release_sock(sk);
2422 }
2423 
2424 
2425 /*
2426  * This routine takes stuff off of the write queue,
2427  * and puts it in the xmit queue.
2428  */
2429 static void
2430 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2431 {
2432         struct sk_buff *skb;
2433 
2434         /*
2435          *      The bytes will have to remain here. In time closedown will
2436          *      empty the write queue and all will be happy 
2437          */
2438 
2439         if(sk->zapped)
2440                 return;
2441 
2442         while((skb = skb_peek(&sk->write_queue)) != NULL &&
2443                 before(skb->h.seq, sk->window_seq + 1) &&
2444                 (sk->retransmits == 0 ||
2445                  sk->timeout != TIME_WRITE ||
2446                  before(skb->h.seq, sk->rcv_ack_seq + 1))
2447                 && sk->packets_out < sk->cong_window) 
2448         {
2449                 IS_SKB(skb);
2450                 skb_unlink(skb);
2451                 /* See if we really need to send the packet. */
2452                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) 
2453                 {
2454                         sk->retransmits = 0;
2455                         kfree_skb(skb, FREE_WRITE);
2456                         if (!sk->dead) 
2457                                 sk->write_space(sk);
2458                 } 
2459                 else
2460                 {
2461                         struct tcphdr *th;
2462                         struct iphdr *iph;
2463                         int size;
2464 /*
2465  * put in the ack seq and window at this point rather than earlier,
2466  * in order to keep them monotonic.  We really want to avoid taking
2467  * back window allocations.  That's legal, but RFC1122 says it's frowned on.
2468  * Ack and window will in general have changed since this packet was put
2469  * on the write queue.
2470  */
2471                         iph = (struct iphdr *)(skb->data +
2472                                                skb->dev->hard_header_len);
2473                         th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
2474                         size = skb->len - (((unsigned char *) th) - skb->data);
2475                         
2476                         th->ack_seq = ntohl(sk->acked_seq);
2477                         th->window = ntohs(tcp_select_window(sk));
2478 
2479                         tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
2480 
2481                         sk->sent_seq = skb->h.seq;
2482                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2483                 }
2484         }
2485 }
2486 
2487 
2488 /*
2489  *      This routine sorts the send list, and resets the
2490  *      sk->send_head and sk->send_tail pointers.
2491  */
2492 
2493 static void sort_send(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2494 {
2495         struct sk_buff *list = NULL;
2496         struct sk_buff *skb,*skb2,*skb3;
2497 
2498         for (skb = sk->send_head; skb != NULL; skb = skb2) 
2499         {
2500                 skb2 = skb->link3;
2501                 if (list == NULL || before (skb2->h.seq, list->h.seq)) 
2502                 {
2503                         skb->link3 = list;
2504                         sk->send_tail = skb;
2505                         list = skb;
2506                 }
2507                 else
2508                 {
2509                         for (skb3 = list; ; skb3 = skb3->link3) 
2510                         {
2511                                 if (skb3->link3 == NULL ||
2512                                     before(skb->h.seq, skb3->link3->h.seq))
2513                                 {
2514                                         skb->link3 = skb3->link3;
2515                                         skb3->link3 = skb;
2516                                         if (skb->link3 == NULL) 
2517                                                 sk->send_tail = skb;
2518                                         break;
2519                                 }
2520                         }
2521                 }
2522         }
2523         sk->send_head = list;
2524 }
2525   
2526 
2527 /*
2528  *      This routine deals with incoming acks, but not outgoing ones.
2529  */
2530 
2531 static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2532 {
2533         unsigned long ack;
2534         int flag = 0;
2535 
2536         /* 
2537          * 1 - there was data in packet as well as ack or new data is sent or 
2538          *     in shutdown state
2539          * 2 - data from retransmit queue was acked and removed
2540          * 4 - window shrunk or data from retransmit queue was acked and removed
2541          */
2542 
2543         if(sk->zapped)
2544                 return(1);      /* Dead, cant ack any more so why bother */
2545 
2546         ack = ntohl(th->ack_seq);
2547         if (ntohs(th->window) > sk->max_window) 
2548         {
2549                 sk->max_window = ntohs(th->window);
2550 #ifdef CONFIG_INET_PCTCP
2551                 sk->mss = min(sk->max_window>>1, sk->mtu);
2552 #else
2553                 sk->mss = min(sk->max_window, sk->mtu);
2554 #endif  
2555         }
2556 
2557         if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2558                 sk->retransmits = 0;
2559 
2560         if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
2561         {
2562                 if(sk->debug)
2563                         printk("Ack ignored %lu %lu\n",ack,sk->sent_seq);
2564                         
2565                 /*
2566                  *      Keepalive processing.
2567                  */
2568                  
2569                 if (after(ack, sk->sent_seq) || (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) 
2570                 {
2571                         return(0);
2572                 }
2573                 if (sk->keepopen) 
2574                 {
2575                         if(sk->timeout==TIME_KEEPOPEN)
2576                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2577                 }
2578                 return(1);
2579         }
2580 
2581         if (len != th->doff*4) 
2582                 flag |= 1;
2583 
2584         /* See if our window has been shrunk. */
2585 
2586         if (after(sk->window_seq, ack+ntohs(th->window))) 
2587         {
2588                 /*
2589                  * We may need to move packets from the send queue
2590                  * to the write queue, if the window has been shrunk on us.
2591                  * The RFC says you are not allowed to shrink your window
2592                  * like this, but if the other end does, you must be able
2593                  * to deal with it.
2594                  */
2595                 struct sk_buff *skb;
2596                 struct sk_buff *skb2;
2597                 struct sk_buff *wskb = NULL;
2598         
2599                 skb2 = sk->send_head;
2600                 sk->send_head = NULL;
2601                 sk->send_tail = NULL;
2602         
2603                 flag |= 4;
2604         
2605                 sk->window_seq = ack + ntohs(th->window);
2606                 cli();
2607                 while (skb2 != NULL) 
2608                 {
2609                         skb = skb2;
2610                         skb2 = skb->link3;
2611                         skb->link3 = NULL;
2612                         if (after(skb->h.seq, sk->window_seq)) 
2613                         {
2614                                 if (sk->packets_out > 0) 
2615                                         sk->packets_out--;
2616                                 /* We may need to remove this from the dev send list. */
2617                                 if (skb->next != NULL) 
2618                                 {
2619                                         skb_unlink(skb);                                
2620                                 }
2621                                 /* Now add it to the write_queue. */
2622                                 if (wskb == NULL)
2623                                         skb_queue_head(&sk->write_queue,skb);
2624                                 else
2625                                         skb_append(wskb,skb);
2626                                 wskb = skb;
2627                         } 
2628                         else 
2629                         {
2630                                 if (sk->send_head == NULL) 
2631                                 {
2632                                         sk->send_head = skb;
2633                                         sk->send_tail = skb;
2634                                 }
2635                                 else
2636                                 {
2637                                         sk->send_tail->link3 = skb;
2638                                         sk->send_tail = skb;
2639                                 }
2640                                 skb->link3 = NULL;
2641                         }
2642                 }
2643                 sti();
2644         }
2645 
2646         if (sk->send_tail == NULL || sk->send_head == NULL) 
2647         {
2648                 sk->send_head = NULL;
2649                 sk->send_tail = NULL;
2650                 sk->packets_out= 0;
2651         }
2652 
2653         sk->window_seq = ack + ntohs(th->window);
2654 
2655         /* We don't want too many packets out there. */
2656         if (sk->timeout == TIME_WRITE && 
2657                 sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
2658         {
2659 /* 
2660  * This is Jacobson's slow start and congestion avoidance. 
2661  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2662  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2663  * counter and increment it once every cwnd times.  It's possible
2664  * that this should be done only if sk->retransmits == 0.  I'm
2665  * interpreting "new data is acked" as including data that has
2666  * been retransmitted but is just now being acked.
2667  */
2668                 if (sk->cong_window < sk->ssthresh)  
2669                   /* 
2670                    *    In "safe" area, increase
2671                    */
2672                         sk->cong_window++;
2673                 else 
2674                 {
2675                   /*
2676                    *    In dangerous area, increase slowly.  In theory this is
2677                    *    sk->cong_window += 1 / sk->cong_window
2678                    */
2679                         if (sk->cong_count >= sk->cong_window) 
2680                         {
2681                                 sk->cong_window++;
2682                                 sk->cong_count = 0;
2683                         }
2684                         else 
2685                                 sk->cong_count++;
2686                 }
2687         }
2688 
2689         sk->rcv_ack_seq = ack;
2690 
2691         /*
2692          * if this ack opens up a zero window, clear backoff.  It was
2693          * being used to time the probes, and is probably far higher than
2694          * it needs to be for normal retransmission.
2695          */
2696 
2697         if (sk->timeout == TIME_PROBE0) 
2698         {
2699                 if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
2700                     ! before (sk->window_seq, sk->write_queue.next->h.seq)) 
2701                 {
2702                         sk->retransmits = 0;
2703                         sk->backoff = 0;
2704                   /*
2705                    *    Recompute rto from rtt.  this eliminates any backoff.
2706                    */
2707 
2708                         sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2709                         if (sk->rto > 120*HZ)
2710                                 sk->rto = 120*HZ;
2711                         if (sk->rto < 20)       /* Was 1*HZ, then 1 - turns out we must allow about
2712                                                    .2 of a second because of BSD delayed acks - on a 100Mb/sec link
2713                                                    .2 of a second is going to need huge windows (SIGH) */
2714                                 sk->rto = 20;
2715                 }
2716         }
2717 
2718   /* 
2719    *    See if we can take anything off of the retransmit queue.
2720    */
2721    
2722         while(sk->send_head != NULL) 
2723         {
2724                 /* Check for a bug. */
2725                 if (sk->send_head->link3 &&
2726                     after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) 
2727                 {
2728                         printk("INET: tcp.c: *** bug send_list out of order.\n");
2729                         sort_send(sk);
2730                 }
2731 
2732                 if (before(sk->send_head->h.seq, ack+1)) 
2733                 {
2734                         struct sk_buff *oskb;   
2735                         if (sk->retransmits) 
2736                         {       
2737                                 /*
2738                                  *      We were retransmitting.  don't count this in RTT est 
2739                                  */
2740                                 flag |= 2;
2741 
2742                                 /*
2743                                  * even though we've gotten an ack, we're still
2744                                  * retransmitting as long as we're sending from
2745                                  * the retransmit queue.  Keeping retransmits non-zero
2746                                  * prevents us from getting new data interspersed with
2747                                  * retransmissions.
2748                                  */
2749 
2750                                 if (sk->send_head->link3)
2751                                         sk->retransmits = 1;
2752                                 else
2753                                         sk->retransmits = 0;
2754                         }
2755                         /*
2756                          * Note that we only reset backoff and rto in the
2757                          * rtt recomputation code.  And that doesn't happen
2758                          * if there were retransmissions in effect.  So the
2759                          * first new packet after the retransmissions is
2760                          * sent with the backoff still in effect.  Not until
2761                          * we get an ack from a non-retransmitted packet do
2762                          * we reset the backoff and rto.  This allows us to deal
2763                          * with a situation where the network delay has increased
2764                          * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2765                          */
2766 
2767                         /*
2768                          *      We have one less packet out there. 
2769                          */
2770                          
2771                         if (sk->packets_out > 0) 
2772                                 sk->packets_out --;
2773                         /* 
2774                          *      Wake up the process, it can probably write more. 
2775                          */
2776                         if (!sk->dead) 
2777                                 sk->write_space(sk);
2778                         oskb = sk->send_head;
2779 
2780                         if (!(flag&2)) 
2781                         {
2782                                 long m;
2783         
2784                                 /*
2785                                  *      The following amusing code comes from Jacobson's
2786                                  *      article in SIGCOMM '88.  Note that rtt and mdev
2787                                  *      are scaled versions of rtt and mean deviation.
2788                                  *      This is designed to be as fast as possible 
2789                                  *      m stands for "measurement".
2790                                  */
2791         
2792                                 m = jiffies - oskb->when;  /* RTT */
2793                                 if(m<=0)
2794                                         m=1;            /* IS THIS RIGHT FOR <0 ??? */
2795                                 m -= (sk->rtt >> 3);    /* m is now error in rtt est */
2796                                 sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
2797                                 if (m < 0)
2798                                         m = -m;         /* m is now abs(error) */
2799                                 m -= (sk->mdev >> 2);   /* similar update on mdev */
2800                                 sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
2801         
2802                                 /*
2803                                  *      Now update timeout.  Note that this removes any backoff.
2804                                  */
2805                          
2806                                 sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2807                                 if (sk->rto > 120*HZ)
2808                                         sk->rto = 120*HZ;
2809                                 if (sk->rto < 20)       /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
2810                                         sk->rto = 20;
2811                                 sk->backoff = 0;
2812                         }
2813                         flag |= (2|4);
2814                         cli();
2815                         oskb = sk->send_head;
2816                         IS_SKB(oskb);
2817                         sk->send_head = oskb->link3;
2818                         if (sk->send_head == NULL) 
2819                         {
2820                                 sk->send_tail = NULL;
2821                         }
2822 
2823                 /*
2824                  *      We may need to remove this from the dev send list. 
2825                  */
2826 
2827                         if (oskb->next)
2828                                 skb_unlink(oskb);
2829                         sti();
2830                         kfree_skb(oskb, FREE_WRITE); /* write. */
2831                         if (!sk->dead) 
2832                                 sk->write_space(sk);
2833                 }
2834                 else
2835                 {
2836                         break;
2837                 }
2838         }
2839 
2840         /*
2841          * XXX someone ought to look at this too.. at the moment, if skb_peek()
2842          * returns non-NULL, we complete ignore the timer stuff in the else
2843          * clause.  We ought to organize the code so that else clause can
2844          * (should) be executed regardless, possibly moving the PROBE timer
2845          * reset over.  The skb_peek() thing should only move stuff to the
2846          * write queue, NOT also manage the timer functions.
2847          */
2848 
2849         /*
2850          * Maybe we can take some stuff off of the write queue,
2851          * and put it onto the xmit queue.
2852          */
2853         if (skb_peek(&sk->write_queue) != NULL) 
2854         {
2855                 if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
2856                         (sk->retransmits == 0 || 
2857                          sk->timeout != TIME_WRITE ||
2858                          before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
2859                         && sk->packets_out < sk->cong_window) 
2860                 {
2861                         flag |= 1;
2862                         tcp_write_xmit(sk);
2863                 }
2864                 else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
2865                         sk->send_head == NULL &&
2866                         sk->ack_backlog == 0 &&
2867                         sk->state != TCP_TIME_WAIT) 
2868                 {
2869                         reset_timer(sk, TIME_PROBE0, sk->rto);
2870                 }               
2871         }
2872         else
2873         {
2874                 /*
2875                  * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
2876                  * from TCP_CLOSE we don't do anything
2877                  *
2878                  * from anything else, if there is write data (or fin) pending,
2879                  * we use a TIME_WRITE timeout, else if keepalive we reset to
2880                  * a KEEPALIVE timeout, else we delete the timer.
2881                  *
2882                  * We do not set flag for nominal write data, otherwise we may
2883                  * force a state where we start to write itsy bitsy tidbits
2884                  * of data.
2885                  */
2886 
2887                 switch(sk->state) {
2888                 case TCP_TIME_WAIT:
2889                         /*
2890                          * keep us in TIME_WAIT until we stop getting packets,
2891                          * reset the timeout.
2892                          */
2893                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2894                         break;
2895                 case TCP_CLOSE:
2896                         /*
2897                          * don't touch the timer.
2898                          */
2899                         break;
2900                 default:
2901                         /*
2902                          * must check send_head, write_queue, and ack_backlog
2903                          * to determine which timeout to use.
2904                          */
2905                         if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) {
2906                                 reset_timer(sk, TIME_WRITE, sk->rto);
2907                         } else if (sk->keepopen) {
2908                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2909                         } else {
2910                                 delete_timer(sk);
2911                         }
2912                         break;
2913                 }
2914 #ifdef NOTDEF
2915                 if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2916                 sk->state != TCP_TIME_WAIT && !sk->keepopen) 
2917                 {
2918                         if (!sk->dead)
2919                                 sk->write_space(sk);
2920                         if (sk->keepopen) {
2921                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2922                         } else {
2923                                 delete_timer(sk);
2924                         }
2925                 }
2926                 else
2927                 {
2928                         if (sk->state != (unsigned char) sk->keepopen) 
2929                         {
2930                                 reset_timer(sk, TIME_WRITE, sk->rto);
2931                         }
2932                         if (sk->state == TCP_TIME_WAIT) 
2933                         {
2934                                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2935                         }       
2936                 }
2937 #endif
2938         }
2939 
2940         if (sk->packets_out == 0 && sk->partial != NULL &&
2941                 skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
2942         {
2943                 flag |= 1;
2944                 tcp_send_partial(sk);
2945         }
2946 
2947         /*
2948          * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
2949          * we are now waiting for an acknowledge to our FIN.  The other end is
2950          * already in TIME_WAIT.
2951          *
2952          * Move to TCP_CLOSE on success.
2953          */
2954 
2955         if (sk->state == TCP_LAST_ACK) 
2956         {
2957                 if (!sk->dead)
2958                         sk->state_change(sk);
2959                 if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) 
2960                 {
2961                         flag |= 1;
2962                         tcp_set_state(sk,TCP_CLOSE);
2963                         sk->shutdown = SHUTDOWN_MASK;
2964                 }
2965         }
2966 
2967         /*
2968          * Incoming ACK to a FIN we sent in the case of our initiating the close.
2969          *
2970          * Move to FIN_WAIT2 to await a FIN from the other end. Set
2971          * SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in.
2972          */
2973 
2974         if (sk->state == TCP_FIN_WAIT1) 
2975         {
2976 
2977                 if (!sk->dead) 
2978                         sk->state_change(sk);
2979                 if (sk->rcv_ack_seq == sk->write_seq) 
2980                 {
2981                         flag |= 1;
2982 #ifdef THIS_BIT_IS_WRONG                        
2983                         if (sk->acked_seq != sk->fin_seq) 
2984                         {
2985                                 tcp_time_wait(sk);
2986                         }
2987                         else
2988 #endif                  
2989                         {
2990                                 sk->shutdown |= SEND_SHUTDOWN;
2991                                 tcp_set_state(sk,TCP_FIN_WAIT2);
2992                         }
2993                 }
2994         }
2995 
2996         /*
2997          *      Incoming ACK to a FIN we sent in the case of a simultaneous close.
2998          *
2999          *      Move to TIME_WAIT
3000          */
3001 
3002         if (sk->state == TCP_CLOSING) 
3003         {
3004 
3005                 if (!sk->dead) 
3006                         sk->state_change(sk);
3007                 if (sk->rcv_ack_seq == sk->write_seq) 
3008                 {
3009                         flag |= 1;
3010                         tcp_time_wait(sk);
3011                 }
3012         }
3013 
3014         /*
3015          * I make no guarantees about the first clause in the following
3016          * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
3017          * what conditions "!flag" would be true.  However I think the rest
3018          * of the conditions would prevent that from causing any
3019          * unnecessary retransmission. 
3020          *   Clearly if the first packet has expired it should be 
3021          * retransmitted.  The other alternative, "flag&2 && retransmits", is
3022          * harder to explain:  You have to look carefully at how and when the
3023          * timer is set and with what timeout.  The most recent transmission always
3024          * sets the timer.  So in general if the most recent thing has timed
3025          * out, everything before it has as well.  So we want to go ahead and
3026          * retransmit some more.  If we didn't explicitly test for this
3027          * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
3028          * would not be true.  If you look at the pattern of timing, you can
3029          * show that rto is increased fast enough that the next packet would
3030          * almost never be retransmitted immediately.  Then you'd end up
3031          * waiting for a timeout to send each packet on the retransmission
3032          * queue.  With my implementation of the Karn sampling algorithm,
3033          * the timeout would double each time.  The net result is that it would
3034          * take a hideous amount of time to recover from a single dropped packet.
3035          * It's possible that there should also be a test for TIME_WRITE, but
3036          * I think as long as "send_head != NULL" and "retransmit" is on, we've
3037          * got to be in real retransmission mode.
3038          *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
3039          * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
3040          * As long as no further losses occur, this seems reasonable.
3041          */
3042         
3043         if (((!flag) || (flag&4)) && sk->send_head != NULL &&
3044                (((flag&2) && sk->retransmits) ||
3045                (sk->send_head->when + sk->rto < jiffies))) 
3046         {
3047                 ip_do_retransmit(sk, 1);
3048                 reset_timer(sk, TIME_WRITE, sk->rto);
3049         }
3050 
3051         return(1);
3052 }
3053 
3054 
3055 /*
3056  *      This routine handles the data.  If there is room in the buffer,
3057  *      it will be have already been moved into it.  If there is no
3058  *      room, then we will just have to discard the packet.
3059  */
3060 
3061 static int tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
3062          unsigned long saddr, unsigned short len)
3063 {
3064         struct sk_buff *skb1, *skb2;
3065         struct tcphdr *th;
3066         int dup_dumped=0;
3067         unsigned long new_seq;
3068 
3069         th = skb->h.th;
3070         skb->len = len -(th->doff*4);
3071 
3072         /* The bytes in the receive read/assembly queue has increased. Needed for the
3073            low memory discard algorithm */
3074            
3075         sk->bytes_rcv += skb->len;
3076         
3077         if (skb->len == 0 && !th->fin && !th->urg && !th->psh) 
3078         {
3079                 /* 
3080                  *      Don't want to keep passing ack's back and forth. 
3081                  *      (someone sent us dataless, boring frame)
3082                  */
3083                 if (!th->ack)
3084                         tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
3085                 kfree_skb(skb, FREE_READ);
3086                 return(0);
3087         }
3088         
3089         /*
3090          *      We no longer have anyone receiving data on this connection.
3091          */
3092 
3093         if(sk->shutdown & RCV_SHUTDOWN)
3094         {
3095                 new_seq= th->seq + skb->len + th->syn;  /* Right edge of _data_ part of frame */
3096                 
3097                 if(after(new_seq,sk->/*copied*/acked_seq+1))    /* If the right edge of this frame is after the last copied byte
3098                                                            then it contains data we will never touch. We send an RST to 
3099                                                            ensure the far end knows it never got to the application */
3100                 {
3101                         sk->acked_seq = new_seq + th->fin;
3102                         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
3103                                 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
3104                         tcp_statistics.TcpEstabResets++;
3105                         tcp_set_state(sk,TCP_CLOSE);
3106                         sk->err = EPIPE;
3107                         sk->shutdown = SHUTDOWN_MASK;
3108                         kfree_skb(skb, FREE_READ);
3109                         if (!sk->dead)
3110                                 sk->state_change(sk);
3111                         return(0);
3112                 }
3113 #if 0           
3114                 /* Discard the frame here - we've already proved its a duplicate */
3115                 
3116                 kfree_skb(skb, FREE_READ);
3117                 return(0);                              
3118 #endif          
3119         }
3120         /*
3121          *      Now we have to walk the chain, and figure out where this one
3122          *      goes into it.  This is set up so that the last packet we received
3123          *      will be the first one we look at, that way if everything comes
3124          *      in order, there will be no performance loss, and if they come
3125          *      out of order we will be able to fit things in nicely.
3126          */
3127 
3128         /* 
3129          *      This should start at the last one, and then go around forwards.
3130          */
3131 
3132         if (skb_peek(&sk->receive_queue) == NULL)       /* Empty queue is easy case */
3133         {
3134                 skb_queue_head(&sk->receive_queue,skb);
3135                 skb1= NULL;
3136         } 
3137         else
3138         {
3139                 for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
3140                 {
3141                         if(sk->debug)
3142                         {
3143                                 printk("skb1=%p :", skb1);
3144                                 printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
3145                                 printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
3146                                 printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
3147                                                 sk->acked_seq);
3148                         }
3149                         
3150                         /*
3151                          *      Optimisation: Duplicate frame or extension of previous frame from
3152                          *      same sequence point (lost ack case).
3153                          *      The frame contains duplicate data or replaces a previous frame
3154                          *      discard the previous frame (safe as sk->inuse is set) and put
3155                          *      the new one in its place.
3156                          */
3157                          
3158                         if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
3159                         {
3160                                 skb_append(skb1,skb);
3161                                 skb_unlink(skb1);
3162                                 kfree_skb(skb1,FREE_READ);
3163                                 dup_dumped=1;
3164                                 skb1=NULL;
3165                                 break;
3166                         }
3167                         
3168                         /*
3169                          *      Found where it fits
3170                          */
3171                          
3172                         if (after(th->seq+1, skb1->h.th->seq))
3173                         {
3174                                 skb_append(skb1,skb);
3175                                 break;
3176                         }
3177                         
3178                         /*
3179                          *      See if we've hit the start. If so insert.
3180                          */
3181                         if (skb1 == skb_peek(&sk->receive_queue))
3182                         {
3183                                 skb_queue_head(&sk->receive_queue, skb);
3184                                 break;
3185                         }
3186                 }
3187         }
3188 
3189         /*
3190          *      Figure out what the ack value for this frame is
3191          */
3192          
3193         th->ack_seq = th->seq + skb->len;
3194         if (th->syn) 
3195                 th->ack_seq++;
3196         if (th->fin)
3197                 th->ack_seq++;
3198 
3199         if (before(sk->acked_seq, sk->copied_seq)) 
3200         {
3201                 printk("*** tcp.c:tcp_data bug acked < copied\n");
3202                 sk->acked_seq = sk->copied_seq;
3203         }
3204 
3205         /*
3206          *      Now figure out if we can ack anything.
3207          */
3208 
3209         if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) 
3210         {
3211                 if (before(th->seq, sk->acked_seq+1)) 
3212                 {
3213                         int newwindow;
3214 
3215                         if (after(th->ack_seq, sk->acked_seq)) 
3216                         {
3217                                 newwindow = sk->window-(th->ack_seq - sk->acked_seq);
3218                                 if (newwindow < 0)
3219                                         newwindow = 0;  
3220                                 sk->window = newwindow;
3221                                 sk->acked_seq = th->ack_seq;
3222                         }
3223                         skb->acked = 1;
3224 
3225                         /* 
3226                          *      When we ack the fin, we turn on the RCV_SHUTDOWN flag.
3227                          */
3228 
3229                         if (skb->h.th->fin) 
3230                         {
3231                                 if (!sk->dead) 
3232                                         sk->state_change(sk);
3233                                 sk->shutdown |= RCV_SHUTDOWN;
3234                         }
3235           
3236                         for(skb2 = skb->next;
3237                             skb2 != (struct sk_buff *)&sk->receive_queue;
3238                             skb2 = skb2->next) 
3239                         {
3240                                 if (before(skb2->h.th->seq, sk->acked_seq+1)) 
3241                                 {
3242                                         if (after(skb2->h.th->ack_seq, sk->acked_seq))
3243                                         {
3244                                                 newwindow = sk->window -
3245                                                  (skb2->h.th->ack_seq - sk->acked_seq);
3246                                                 if (newwindow < 0)
3247                                                         newwindow = 0;  
3248                                                 sk->window = newwindow;
3249                                                 sk->acked_seq = skb2->h.th->ack_seq;
3250                                         }
3251                                         skb2->acked = 1;
3252                                         /*
3253                                          *      When we ack the fin, we turn on
3254                                          *      the RCV_SHUTDOWN flag.
3255                                          */
3256                                         if (skb2->h.th->fin) 
3257                                         {
3258                                                 sk->shutdown |= RCV_SHUTDOWN;
3259                                                 if (!sk->dead)
3260                                                         sk->state_change(sk);
3261                                         }
3262 
3263                                         /*
3264                                          *      Force an immediate ack.
3265                                          */
3266                                          
3267                                         sk->ack_backlog = sk->max_ack_backlog;
3268                                 }
3269                                 else
3270                                 {
3271                                         break;
3272                                 }
3273                         }
3274 
3275                         /*
3276                          *      This also takes care of updating the window.
3277                          *      This if statement needs to be simplified.
3278                          */
3279                         if (!sk->delay_acks ||
3280                             sk->ack_backlog >= sk->max_ack_backlog || 
3281                             sk->bytes_rcv > sk->max_unacked || th->fin) {
3282         /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
3283                         }
3284                         else 
3285                         {
3286                                 sk->ack_backlog++;
3287                                 if(sk->debug)
3288                                         printk("Ack queued.\n");
3289                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3290                         }
3291                 }
3292         }
3293 
3294         /*
3295          *      If we've missed a packet, send an ack.
3296          *      Also start a timer to send another.
3297          */
3298          
3299         if (!skb->acked) 
3300         {
3301         
3302         /*
3303          *      This is important.  If we don't have much room left,
3304          *      we need to throw out a few packets so we have a good
3305          *      window.  Note that mtu is used, not mss, because mss is really
3306          *      for the send side.  He could be sending us stuff as large as mtu.
3307          */
3308                  
3309                 while (sk->prot->rspace(sk) < sk->mtu) 
3310                 {
3311                         skb1 = skb_peek(&sk->receive_queue);
3312                         if (skb1 == NULL) 
3313                         {
3314                                 printk("INET: tcp.c:tcp_data memory leak detected.\n");
3315                                 break;
3316                         }
3317 
3318                         /*
3319                          *      Don't throw out something that has been acked. 
3320                          */
3321                  
3322                         if (skb1->acked) 
3323                         {
3324                                 break;
3325                         }
3326                 
3327                         skb_unlink(skb1);
3328                         kfree_skb(skb1, FREE_READ);
3329                 }
3330                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3331                 sk->ack_backlog++;
3332                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3333         }
3334         else
3335         {
3336                 /* We missed a packet.  Send an ack to try to resync things. */
3337                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3338         }
3339 
3340         /*
3341          *      Now tell the user we may have some data. 
3342          */
3343          
3344         if (!sk->dead) 
3345         {
3346                 if(sk->debug)
3347                         printk("Data wakeup.\n");
3348                 sk->data_ready(sk,0);
3349         } 
3350         return(0);
3351 }
3352 
3353 
3354 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
3355 {
3356         unsigned long ptr = ntohs(th->urg_ptr);
3357 
3358         if (ptr)
3359                 ptr--;
3360         ptr += th->seq;
3361 
3362         /* ignore urgent data that we've already seen and read */
3363         if (after(sk->copied_seq+1, ptr))
3364                 return;
3365 
3366         /* do we already have a newer (or duplicate) urgent pointer? */
3367         if (sk->urg_data && !after(ptr, sk->urg_seq))
3368                 return;
3369 
3370         /* tell the world about our new urgent pointer */
3371         if (sk->proc != 0) {
3372                 if (sk->proc > 0) {
3373                         kill_proc(sk->proc, SIGURG, 1);
3374                 } else {
3375                         kill_pg(-sk->proc, SIGURG, 1);
3376                 }
3377         }
3378         sk->urg_data = URG_NOTYET;
3379         sk->urg_seq = ptr;
3380 }
3381 
3382 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
3383         unsigned long saddr, unsigned long len)
3384 {
3385         unsigned long ptr;
3386 
3387         /* check if we get a new urgent pointer */
3388         if (th->urg)
3389                 tcp_check_urg(sk,th);
3390 
3391         /* do we wait for any urgent data? */
3392         if (sk->urg_data != URG_NOTYET)
3393                 return 0;
3394 
3395         /* is the urgent pointer pointing into this packet? */
3396         ptr = sk->urg_seq - th->seq + th->doff*4;
3397         if (ptr >= len)
3398                 return 0;
3399 
3400         /* ok, got the correct packet, update info */
3401         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
3402         if (!sk->dead)
3403                 sk->data_ready(sk,0);
3404         return 0;
3405 }
3406 
3407 
3408 /*
3409  *  This deals with incoming fins. 'Linus at 9 O'clock' 8-) 
3410  *
3411  *  If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
3412  *  (and thence onto LAST-ACK and finally, CLOSE, we never enter
3413  *  TIME-WAIT)
3414  *
3415  *  If we are in FINWAIT-1, a received FIN indicates simultaneous
3416  *  close and we go into CLOSING (and later onto TIME-WAIT)
3417  *
3418  *  If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3419  *
3420  */
3421 static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
3422          unsigned long saddr, struct device *dev)
3423 {
3424         sk->fin_seq = th->seq + skb->len + th->syn + th->fin;
3425 
3426         if (!sk->dead) 
3427         {
3428                 sk->state_change(sk);
3429         }
3430 
3431         switch(sk->state) 
3432         {
3433                 case TCP_SYN_RECV:
3434                 case TCP_SYN_SENT:
3435                 case TCP_ESTABLISHED:
3436                         /*
3437                          * move to CLOSE_WAIT, tcp_data() already handled
3438                          * sending the ack.
3439                          */
3440                         reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
3441                         /*sk->fin_seq = th->seq+1;*/
3442                         tcp_set_state(sk,TCP_CLOSE_WAIT);
3443                         if (th->rst)
3444                                 sk->shutdown = SHUTDOWN_MASK;
3445                         break;
3446 
3447                 case TCP_CLOSE_WAIT:
3448                 case TCP_CLOSING:
3449                         /*
3450                          * received a retransmission of the FIN, do
3451                          * nothing.
3452                          */
3453                         break;
3454                 case TCP_TIME_WAIT:
3455                         /*
3456                          * received a retransmission of the FIN,
3457                          * restart the TIME_WAIT timer.
3458                          */
3459                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3460                         return(0);
3461                 case TCP_FIN_WAIT1:
3462                         /*
3463                          * This case occurs when a simultaneous close
3464                          * happens, we must ack the received FIN and
3465                          * enter the CLOSING state.
3466                          *
3467                          * XXX timeout not set properly
3468                          */
3469 
3470                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3471                         /*sk->fin_seq = th->seq+1;*/
3472                         tcp_set_state(sk,TCP_CLOSING);
3473                         break;
3474                 case TCP_FIN_WAIT2:
3475                         /*
3476                          * received a FIN -- send ACK and enter TIME_WAIT
3477                          */
3478                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3479                         /*sk->fin_seq = th->seq+1;*/
3480                         sk->shutdown|=SHUTDOWN_MASK;
3481                         tcp_set_state(sk,TCP_TIME_WAIT);
3482                         break;
3483                 case TCP_CLOSE:
3484                         /*
3485                          * already in CLOSE
3486                          */
3487                         break;
3488                 default:
3489                         tcp_set_state(sk,TCP_LAST_ACK);
3490         
3491                         /* Start the timers. */
3492                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3493                         return(0);
3494         }
3495         sk->ack_backlog++;
3496 
3497         return(0);
3498 }
3499 
3500 
3501 /* This will accept the next outstanding connection. */
3502 static struct sock *
3503 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
3504 {
3505         struct sock *newsk;
3506         struct sk_buff *skb;
3507   
3508   /*
3509    * We need to make sure that this socket is listening,
3510    * and that it has something pending.
3511    */
3512 
3513         if (sk->state != TCP_LISTEN) 
3514         {
3515                 sk->err = EINVAL;
3516                 return(NULL); 
3517         }
3518 
3519         /* Avoid the race. */
3520         cli();
3521         sk->inuse = 1;
3522 
3523         while((skb = tcp_dequeue_established(sk)) == NULL) 
3524         {
3525                 if (flags & O_NONBLOCK) 
3526                 {
3527                         sti();
3528                         release_sock(sk);
3529                         sk->err = EAGAIN;
3530                         return(NULL);
3531                 }
3532 
3533                 release_sock(sk);
3534                 interruptible_sleep_on(sk->sleep);
3535                 if (current->signal & ~current->blocked) 
3536                 {
3537                         sti();
3538                         sk->err = ERESTARTSYS;
3539                         return(NULL);
3540                 }
3541                 sk->inuse = 1;
3542         }
3543         sti();
3544 
3545         /*
3546          *      Now all we need to do is return skb->sk. 
3547          */
3548 
3549         newsk = skb->sk;
3550 
3551         kfree_skb(skb, FREE_READ);
3552         sk->ack_backlog--;
3553         release_sock(sk);
3554         return(newsk);
3555 }
3556 
3557 
3558 /*
3559  *      This will initiate an outgoing connection. 
3560  */
3561  
3562 static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3563 {
3564         struct sk_buff *buff;
3565         struct device *dev=NULL;
3566         unsigned char *ptr;
3567         int tmp;
3568         struct tcphdr *t1;
3569         struct rtable *rt;
3570 
3571         if (sk->state != TCP_CLOSE) 
3572                 return(-EISCONN);
3573 
3574         if (addr_len < 8) 
3575                 return(-EINVAL);
3576 
3577         if (usin->sin_family && usin->sin_family != AF_INET) 
3578                 return(-EAFNOSUPPORT);
3579 
3580         /*
3581          *      connect() to INADDR_ANY means loopback (BSD'ism).
3582          */
3583         
3584         if(usin->sin_addr.s_addr==INADDR_ANY)
3585                 usin->sin_addr.s_addr=ip_my_addr();
3586                   
3587         /*
3588          *      Don't want a TCP connection going to a broadcast address 
3589          */
3590 
3591         if (ip_chk_addr(usin->sin_addr.s_addr) == IS_BROADCAST) 
3592         { 
3593                 return -ENETUNREACH;
3594         }
3595   
3596         /*
3597          *      Connect back to the same socket: Blows up so disallow it 
3598          */
3599 
3600         if(sk->saddr == usin->sin_addr.s_addr && sk->num==ntohs(usin->sin_port))
3601                 return -EBUSY;
3602 
3603         sk->inuse = 1;
3604         sk->daddr = usin->sin_addr.s_addr;
3605         sk->write_seq = jiffies * SEQ_TICK - seq_offset;
3606         sk->window_seq = sk->write_seq;
3607         sk->rcv_ack_seq = sk->write_seq -1;
3608         sk->err = 0;
3609         sk->dummy_th.dest = usin->sin_port;
3610         release_sock(sk);
3611 
3612         buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3613         if (buff == NULL) 
3614         {
3615                 return(-ENOMEM);
3616         }
3617         sk->inuse = 1;
3618         buff->len = 24;
3619         buff->sk = sk;
3620         buff->free = 1;
3621         buff->localroute = sk->localroute;
3622         
3623         t1 = (struct tcphdr *) buff->data;
3624 
3625         /*
3626          *      Put in the IP header and routing stuff. 
3627          */
3628          
3629         rt=ip_rt_route(sk->daddr, NULL, NULL);
3630         
3631 
3632         /*
3633          *      We need to build the routing stuff from the things saved in skb. 
3634          */
3635 
3636         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3637                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3638         if (tmp < 0) 
3639         {
3640                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3641                 release_sock(sk);
3642                 return(-ENETUNREACH);
3643         }
3644 
3645         buff->len += tmp;
3646         t1 = (struct tcphdr *)((char *)t1 +tmp);
3647 
3648         memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3649         t1->seq = ntohl(sk->write_seq++);
3650         sk->sent_seq = sk->write_seq;
3651         buff->h.seq = sk->write_seq;
3652         t1->ack = 0;
3653         t1->window = 2;
3654         t1->res1=0;
3655         t1->res2=0;
3656         t1->rst = 0;
3657         t1->urg = 0;
3658         t1->psh = 0;
3659         t1->syn = 1;
3660         t1->urg_ptr = 0;
3661         t1->doff = 6;
3662         /* use 512 or whatever user asked for */
3663         
3664         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
3665                 sk->window_clamp=rt->rt_window;
3666         else
3667                 sk->window_clamp=0;
3668 
3669         if (sk->user_mss)
3670                 sk->mtu = sk->user_mss;
3671         else if(rt!=NULL && (rt->rt_flags&RTF_MTU))
3672                 sk->mtu = rt->rt_mss;
3673         else 
3674         {
3675 #ifdef CONFIG_INET_SNARL
3676                 if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3677 #else
3678                 if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3679 #endif
3680                         sk->mtu = 576 - HEADER_SIZE;
3681                 else
3682                         sk->mtu = MAX_WINDOW;
3683         }
3684         /*
3685          *      but not bigger than device MTU 
3686          */
3687 
3688         if(sk->mtu <32)
3689                 sk->mtu = 32;   /* Sanity limit */
3690                 
3691         sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3692         
3693         /*
3694          *      Put in the TCP options to say MTU. 
3695          */
3696 
3697         ptr = (unsigned char *)(t1+1);
3698         ptr[0] = 2;
3699         ptr[1] = 4;
3700         ptr[2] = (sk->mtu) >> 8;
3701         ptr[3] = (sk->mtu) & 0xff;
3702         tcp_send_check(t1, sk->saddr, sk->daddr,
3703                   sizeof(struct tcphdr) + 4, sk);
3704 
3705         /*
3706          *      This must go first otherwise a really quick response will get reset. 
3707          */
3708 
3709         tcp_set_state(sk,TCP_SYN_SENT);
3710 /*      sk->rtt = TCP_CONNECT_TIME;*/
3711         sk->rto = TCP_TIMEOUT_INIT;
3712         reset_timer(sk, TIME_WRITE, sk->rto);   /* Timer for repeating the SYN until an answer */
3713         sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3714 
3715         sk->prot->queue_xmit(sk, dev, buff, 0);  
3716         tcp_statistics.TcpActiveOpens++;
3717         tcp_statistics.TcpOutSegs++;
3718   
3719         release_sock(sk);
3720         return(0);
3721 }
3722 
3723 
3724 /* This functions checks to see if the tcp header is actually acceptable. */
3725 static int
3726 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3727              struct options *opt, unsigned long saddr, struct device *dev)
3728 {
3729         unsigned long next_seq;
3730 
3731         next_seq = len - 4*th->doff;
3732         if (th->fin)
3733                 next_seq++;
3734         /* if we have a zero window, we can't have any data in the packet.. */
3735         if (next_seq && !sk->window)
3736                 goto ignore_it;
3737         next_seq += th->seq;
3738 
3739         /*
3740          * This isn't quite right.  sk->acked_seq could be more recent
3741          * than sk->window.  This is however close enough.  We will accept
3742          * slightly more packets than we should, but it should not cause
3743          * problems unless someone is trying to forge packets.
3744          */
3745 
3746         /* have we already seen all of this packet? */
3747         if (!after(next_seq+1, sk->acked_seq))
3748                 goto ignore_it;
3749         /* or does it start beyond the window? */
3750         if (!before(th->seq, sk->acked_seq + sk->window + 1))
3751                 goto ignore_it;
3752 
3753         /* ok, at least part of this packet would seem interesting.. */
3754         return 1;
3755 
3756 ignore_it:
3757         if (th->rst)
3758                 return 0;
3759 
3760         /*
3761          *      Send a reset if we get something not ours and we are
3762          *      unsynchronized. Note: We don't do anything to our end. We
3763          *      are just killing the bogus remote connection then we will
3764          *      connect again and it will work (with luck).
3765          */
3766          
3767         if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) {
3768                 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3769                 return 1;
3770         }
3771 
3772         /* Try to resync things. */
3773         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3774         return 0;
3775 }
3776 
3777 
3778 #ifdef TCP_FASTPATH
3779 /*
3780  *      Is the end of the queue clear of fragments as yet unmerged into the data stream
3781  *      Yes if
3782  *      a) The queue is empty
3783  *      b) The last frame on the queue has the acked flag set
3784  */
3785 
3786 static inline int tcp_clean_end(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3787 {
3788         struct sk_buff *skb=skb_peek(&sk->receive_queue);
3789         if(skb==NULL || sk->receive_queue.prev->acked)
3790                 return 1;
3791 }
3792 
3793 #endif
3794 
3795 int
3796 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3797         unsigned long daddr, unsigned short len,
3798         unsigned long saddr, int redo, struct inet_protocol * protocol)
3799 {
3800         struct tcphdr *th;
3801         struct sock *sk;
3802 
3803         if (!skb) 
3804         {
3805                 return(0);
3806         }
3807 
3808         if (!dev) 
3809         {
3810                 return(0);
3811         }
3812   
3813         tcp_statistics.TcpInSegs++;
3814   
3815         if(skb->pkt_type!=PACKET_HOST)
3816         {
3817                 kfree_skb(skb,FREE_READ);
3818                 return(0);
3819         }
3820   
3821         th = skb->h.th;
3822 
3823         /*
3824          *      Find the socket.
3825          */
3826 
3827         sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3828 
3829         /*
3830          *      If this socket has got a reset its to all intents and purposes 
3831          *      really dead 
3832          */
3833          
3834         if (sk!=NULL && sk->zapped)
3835                 sk=NULL;
3836 
3837         if (!redo) 
3838         {
3839                 if (tcp_check(th, len, saddr, daddr )) 
3840                 {
3841                         skb->sk = NULL;
3842                         kfree_skb(skb,FREE_READ);
3843                         /*
3844                          * We don't release the socket because it was
3845                          * never marked in use.
3846                          */
3847                         return(0);
3848                 }
3849                 th->seq = ntohl(th->seq);
3850 
3851                 /* See if we know about the socket. */
3852                 if (sk == NULL) 
3853                 {
3854                         if (!th->rst)
3855                                 tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3856                         skb->sk = NULL;
3857                         kfree_skb(skb, FREE_READ);
3858                         return(0);
3859                 }
3860 
3861                 skb->len = len;
3862                 skb->sk = sk;
3863                 skb->acked = 0;
3864                 skb->used = 0;
3865                 skb->free = 0;
3866                 skb->saddr = daddr;
3867                 skb->daddr = saddr;
3868         
3869                 /* We may need to add it to the backlog here. */
3870                 cli();
3871                 if (sk->inuse) 
3872                 {
3873                         skb_queue_head(&sk->back_log, skb);
3874                         sti();
3875                         return(0);
3876                 }
3877                 sk->inuse = 1;
3878                 sti();
3879         }
3880         else
3881         {
3882                 if (!sk) 
3883                 {
3884                         return(0);
3885                 }
3886         }
3887 
3888 
3889         if (!sk->prot) 
3890         {
3891                 return(0);
3892         }
3893 
3894 
3895         /*
3896          *      Charge the memory to the socket. 
3897          */
3898          
3899         if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) 
3900         {
3901                 skb->sk = NULL;
3902                 kfree_skb(skb, FREE_READ);
3903                 release_sock(sk);
3904                 return(0);
3905         }
3906 
3907         sk->rmem_alloc += skb->mem_len;
3908 
3909 #ifdef TCP_FASTPATH
3910 /*
3911  *      Incoming data stream fastpath. 
3912  *
3913  *      We try to optimise two things.
3914  *      1) Spot general data arriving without funny options and skip extra checks and the switch.
3915  *      2) Spot the common case in raw data receive streams of a packet that has no funny options,
3916  *      fits exactly on the end of the current queue and may or may not have the ack bit set.
3917  *
3918  *      Case two especially is done inline in this routine so there are no long jumps causing heavy
3919  *      cache thrashing, no function call overhead (except for the ack sending if needed) and for
3920  *      speed although further optimizing here is possible.
3921  */
3922  
3923         /* I'm trusting gcc to optimise this sensibly... might need judicious application of a software mallet */
3924         if(!(sk->shutdown & RCV_SHUTDOWN) && sk->state==TCP_ESTABLISHED && !th->urg && !th->syn && !th->fin && !th->rst)
3925         {       
3926                 /* Packets in order. Fits window */
3927                 if(th->seq == sk->acked_seq+1 && sk->window && tcp_clean_end(sk))
3928                 {
3929                         /* Ack is harder */
3930                         if(th->ack && !tcp_ack(sk, th, saddr, len))
3931                         {
3932                                 kfree_skb(skb, FREE_READ);
3933                                 release_sock(sk);
3934                                 return 0;
3935                         }
3936                         /*
3937                          *      Set up variables
3938                          */
3939                         skb->len -= (th->doff *4);
3940                         sk->bytes_rcv += skb->len;
3941                         tcp_rx_hit2++;
3942                         if(skb->len)
3943                         {
3944                                 skb_queue_tail(&sk->receive_queue,skb); /* We already know where to put it */
3945                                 if(sk->window >= skb->len)
3946                                         sk->window-=skb->len;                   /* We know its effect on the window */
3947                                 else
3948                                         sk->window=0;
3949                                 sk->acked_seq = th->seq+skb->len;       /* Easy */
3950                                 skb->acked=1;                           /* Guaranteed true */
3951                                 if(!sk->delay_acks || sk->ack_backlog >= sk->max_ack_backlog || 
3952                                         sk->bytes_rcv > sk->max_unacked)
3953                                 {
3954                                         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th , saddr);
3955                                 }
3956                                 else
3957                                 {
3958                                         sk->ack_backlog++;
3959                                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3960                                 }
3961                                 if(!sk->dead)
3962                                         sk->data_ready(sk,0);
3963                                 release_sock(sk);
3964                                 return 0;
3965                         }
3966                 }
3967                 /*
3968                  *      More generic case of arriving data stream in ESTABLISHED
3969                  */
3970                 tcp_rx_hit1++;
3971                 if(!tcp_sequence(sk, th, len, opt, saddr, dev))
3972                 {
3973                         kfree_skb(skb, FREE_READ);
3974                         release_sock(sk);
3975                         return 0;
3976                 }
3977                 if(th->ack && !tcp_ack(sk, th, saddr, len))
3978                 {
3979                         kfree_skb(skb, FREE_READ);
3980                         release_sock(sk);
3981                         return 0;
3982                 }
3983                 if(tcp_data(skb, sk, saddr, len))
3984                         kfree_skb(skb, FREE_READ);
3985                 release_sock(sk);
3986                 return 0;
3987         }
3988         tcp_rx_miss++;
3989 #endif  
3990 
3991         /*
3992          *      Now deal with all cases.
3993          */
3994          
3995         switch(sk->state) 
3996         {
3997         
3998                 /*
3999                  * This should close the system down if it's waiting
4000                  * for an ack that is never going to be sent.
4001                  */
4002                 case TCP_LAST_ACK:
4003                         if (th->rst) 
4004                         {
4005                                 sk->zapped=1;
4006                                 sk->err = ECONNRESET;
4007                                 tcp_set_state(sk,TCP_CLOSE);
4008                                 sk->shutdown = SHUTDOWN_MASK;
4009                                 if (!sk->dead) 
4010                                 {
4011                                         sk->state_change(sk);
4012                                 }
4013                                 kfree_skb(skb, FREE_READ);
4014                                 release_sock(sk);
4015                                 return(0);
4016                         }
4017 
4018                 case TCP_ESTABLISHED:
4019                 case TCP_CLOSE_WAIT:
4020                 case TCP_CLOSING:
4021                 case TCP_FIN_WAIT1:
4022                 case TCP_FIN_WAIT2:
4023                 case TCP_TIME_WAIT:
4024                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
4025                         {
4026                                 kfree_skb(skb, FREE_READ);
4027                                 release_sock(sk);
4028                                 return(0);
4029                         }
4030 
4031                         if (th->rst) 
4032                         {
4033                                 tcp_statistics.TcpEstabResets++;
4034                                 sk->zapped=1;
4035                                 /* This means the thing should really be closed. */
4036                                 sk->err = ECONNRESET;
4037                                 if (sk->state == TCP_CLOSE_WAIT) 
4038                                 {
4039                                         sk->err = EPIPE;
4040                                 }
4041         
4042                                 /*
4043                                  * A reset with a fin just means that
4044                                  * the data was not all read.
4045                                  */
4046                                 tcp_set_state(sk,TCP_CLOSE);
4047                                 sk->shutdown = SHUTDOWN_MASK;
4048                                 if (!sk->dead) 
4049                                 {
4050                                         sk->state_change(sk);
4051                                 }
4052                                 kfree_skb(skb, FREE_READ);
4053                                 release_sock(sk);
4054                                 return(0);
4055                         }
4056                         if (th->syn) 
4057                         {
4058                                 tcp_statistics.TcpEstabResets++;
4059                                 sk->err = ECONNRESET;
4060                                 tcp_set_state(sk,TCP_CLOSE);
4061                                 sk->shutdown = SHUTDOWN_MASK;
4062                                 tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
4063                                 if (!sk->dead) {
4064                                         sk->state_change(sk);
4065                                 }
4066                                 kfree_skb(skb, FREE_READ);
4067                                 release_sock(sk);
4068                                 return(0);
4069                         }
4070         
4071                         if (th->ack && !tcp_ack(sk, th, saddr, len)) {
4072                                 kfree_skb(skb, FREE_READ);
4073                                 release_sock(sk);
4074                                 return(0);
4075                         }
4076         
4077                         if (tcp_urg(sk, th, saddr, len)) {
4078                                 kfree_skb(skb, FREE_READ);
4079                                 release_sock(sk);
4080                                 return(0);
4081                         }
4082 
4083         
4084                         if (tcp_data(skb, sk, saddr, len)) {
4085                                 kfree_skb(skb, FREE_READ);
4086                                 release_sock(sk);
4087                                 return(0);
4088                         }       
4089 
4090                         if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) {
4091                                 kfree_skb(skb, FREE_READ);
4092                                 release_sock(sk);
4093                                 return(0);
4094                         }
4095         
4096                         release_sock(sk);
4097                         return(0);
4098                 
4099                 case TCP_CLOSE:
4100                         if (sk->dead || sk->daddr) {
4101                                 kfree_skb(skb, FREE_READ);
4102                                         release_sock(sk);
4103                                 return(0);
4104                         }
4105         
4106                         if (!th->rst) {
4107                                 if (!th->ack)
4108                                         th->ack_seq = 0;
4109                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4110                         }
4111                         kfree_skb(skb, FREE_READ);
4112                         release_sock(sk);
4113                                 return(0);
4114         
4115                 case TCP_LISTEN:
4116                         if (th->rst) {
4117                                 kfree_skb(skb, FREE_READ);
4118                                 release_sock(sk);
4119                                 return(0);
4120                         }
4121                         if (th->ack) {
4122                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4123                                 kfree_skb(skb, FREE_READ);
4124                                 release_sock(sk);
4125                                 return(0);
4126                         }
4127         
4128                         if (th->syn) 
4129                         {
4130                                 /*
4131                                  * Now we just put the whole thing including
4132                                  * the header and saddr, and protocol pointer
4133                                  * into the buffer.  We can't respond until the
4134                                  * user tells us to accept the connection.
4135                                  */
4136                                 tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
4137                                 release_sock(sk);
4138                                 return(0);
4139                         }
4140 
4141                         kfree_skb(skb, FREE_READ);
4142                         release_sock(sk);
4143                         return(0);
4144 
4145                 case TCP_SYN_RECV:
4146                         if (th->syn) {
4147                                 /* Probably a retransmitted syn */
4148                                 kfree_skb(skb, FREE_READ);
4149                                 release_sock(sk);
4150                                 return(0);
4151                         }
4152         
4153         
4154                 default:
4155                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
4156                         {
4157                                 kfree_skb(skb, FREE_READ);
4158                                 release_sock(sk);
4159                                 return(0);
4160                         }
4161         
4162                 case TCP_SYN_SENT:
4163                         if (th->rst) 
4164                         {
4165                                 tcp_statistics.TcpAttemptFails++;
4166                                 sk->err = ECONNREFUSED;
4167                                 tcp_set_state(sk,TCP_CLOSE);
4168                                 sk->shutdown = SHUTDOWN_MASK;
4169                                 sk->zapped = 1;
4170                                 if (!sk->dead) 
4171                                 {
4172                                         sk->state_change(sk);
4173                                 }
4174                                 kfree_skb(skb, FREE_READ);
4175                                 release_sock(sk);
4176                                 return(0);
4177                         }
4178                         if (!th->ack) 
4179                         {
4180                                 if (th->syn) 
4181                                 {
4182                                         tcp_set_state(sk,TCP_SYN_RECV);
4183                                 }
4184                                 kfree_skb(skb, FREE_READ);
4185                                 release_sock(sk);
4186                                 return(0);
4187                         }
4188         
4189                         switch(sk->state) 
4190                         {
4191                                 case TCP_SYN_SENT:
4192                                         if (!tcp_ack(sk, th, saddr, len)) 
4193                                         {
4194                                                 tcp_statistics.TcpAttemptFails++;
4195                                                 tcp_reset(daddr, saddr, th,
4196                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4197                                                 kfree_skb(skb, FREE_READ);
4198                                                         release_sock(sk);
4199                                                 return(0);
4200                                         }
4201         
4202                                         /*
4203                                          * If the syn bit is also set, switch to
4204                                          * tcp_syn_recv, and then to established.
4205                                          */
4206                                         if (!th->syn) 
4207                                         {
4208                                                 kfree_skb(skb, FREE_READ);
4209                                                 release_sock(sk);
4210                                                 return(0);
4211                                         }
4212         
4213                                         /* Ack the syn and fall through. */
4214                                         sk->acked_seq = th->seq+1;
4215                                         sk->fin_seq = th->seq;
4216                                         tcp_send_ack(sk->sent_seq, th->seq+1,
4217                                                 sk, th, sk->daddr);
4218                 
4219                                 case TCP_SYN_RECV:
4220                                         if (!tcp_ack(sk, th, saddr, len)) 
4221                                         {
4222                                                 tcp_statistics.TcpAttemptFails++;
4223                                                 tcp_reset(daddr, saddr, th,
4224                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
4225                                                 kfree_skb(skb, FREE_READ);
4226                                                 release_sock(sk);
4227                                                 return(0);
4228                                         }
4229         
4230                                         tcp_set_state(sk,TCP_ESTABLISHED);
4231         
4232                                         /*
4233                                          *      Now we need to finish filling out
4234                                          *      some of the tcp header.
4235                                          * 
4236                                          *      We need to check for mtu info. 
4237                                          */
4238                                         tcp_options(sk, th);
4239                                         sk->dummy_th.dest = th->source;
4240                                         sk->copied_seq = sk->acked_seq-1;
4241                                         if (!sk->dead) 
4242                                         {
4243                                                 sk->state_change(sk);
4244                                         }
4245         
4246                                         /*
4247                                          * We've already processed his first
4248                                          * ack.  In just about all cases that
4249                                          * will have set max_window.  This is
4250                                          * to protect us against the possibility
4251                                          * that the initial window he sent was 0.
4252                                          * This must occur after tcp_options, which
4253                                          * sets sk->mtu.
4254                                          */
4255                                         if (sk->max_window == 0) 
4256                                         {
4257                                                 sk->max_window = 32;
4258                                                 sk->mss = min(sk->max_window, sk->mtu);
4259                                         }
4260 
4261                                         /*
4262                                          * Now process the rest like we were
4263                                          * already in the established state.
4264                                          */
4265                                         if (th->urg) 
4266                                         {
4267                                                 if (tcp_urg(sk, th, saddr, len)) 
4268                                                 { 
4269                                                         kfree_skb(skb, FREE_READ);
4270                                                         release_sock(sk);
4271                                                         return(0);
4272                                                 }
4273                                         }
4274                                         if (tcp_data(skb, sk, saddr, len))
4275                                                 kfree_skb(skb, FREE_READ);
4276 
4277                                         if (th->fin)
4278                                                 tcp_fin(skb, sk, th, saddr, dev);
4279                                         release_sock(sk);
4280                                         return(0);
4281                         }
4282         
4283                         if (th->urg) 
4284                         {
4285                                 if (tcp_urg(sk, th, saddr, len)) 
4286                                 {
4287                                         kfree_skb(skb, FREE_READ);
4288                                         release_sock(sk);
4289                                         return(0);
4290                                 }
4291                         }
4292                         if (tcp_data(skb, sk, saddr, len)) 
4293                         {
4294                                 kfree_skb(skb, FREE_READ);
4295                                 release_sock(sk);
4296                                 return(0);
4297                         }
4298         
4299                         if (!th->fin) 
4300                         {
4301                                 release_sock(sk);
4302                                 return(0);
4303                         }
4304                         tcp_fin(skb, sk, th, saddr, dev);
4305                         release_sock(sk);
4306                         return(0);
4307         }
4308 }
4309 
4310 
4311 /*
4312  * This routine sends a packet with an out of date sequence
4313  * number. It assumes the other end will try to ack it.
4314  */
4315 
4316 static void tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4317 {
4318         struct sk_buff *buff;
4319         struct tcphdr *t1;
4320         struct device *dev=NULL;
4321         int tmp;
4322 
4323         if (sk->zapped)
4324                 return; /* After a valid reset we can send no more */
4325 
4326         /*
4327          * Write data can still be transmitted/retransmitted in the
4328          * following states.  If any other state is encountered, return.
4329          */
4330 
4331         if (sk->state != TCP_ESTABLISHED && 
4332             sk->state != TCP_CLOSE_WAIT &&
4333             sk->state != TCP_FIN_WAIT1 && 
4334             sk->state != TCP_LAST_ACK &&
4335             sk->state != TCP_CLOSING
4336         ) {
4337                 return;
4338         }
4339 
4340         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
4341         if (buff == NULL) 
4342                 return;
4343 
4344         buff->len = sizeof(struct tcphdr);
4345         buff->free = 1;
4346         buff->sk = sk;
4347         buff->localroute = sk->localroute;
4348 
4349         t1 = (struct tcphdr *) buff->data;
4350 
4351         /* Put in the IP header and routing stuff. */
4352         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
4353                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
4354         if (tmp < 0) 
4355         {
4356                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
4357                 return;
4358         }
4359 
4360         buff->len += tmp;
4361         t1 = (struct tcphdr *)((char *)t1 +tmp);
4362 
4363         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
4364 
4365         /*
4366          * Use a previous sequence.
4367          * This should cause the other end to send an ack.
4368          */
4369         t1->seq = htonl(sk->sent_seq-1);
4370         t1->ack = 1; 
4371         t1->res1= 0;
4372         t1->res2= 0;
4373         t1->rst = 0;
4374         t1->urg = 0;
4375         t1->psh = 0;
4376         t1->fin = 0;
4377         t1->syn = 0;
4378         t1->ack_seq = ntohl(sk->acked_seq);
4379         t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
4380         t1->doff = sizeof(*t1)/4;
4381         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
4382 
4383          /*     Send it and free it.
4384           *     This will prevent the timer from automatically being restarted.
4385           */
4386         sk->prot->queue_xmit(sk, dev, buff, 1);
4387         tcp_statistics.TcpOutSegs++;
4388 }
4389 
4390 void
4391 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4392 {
4393         if (sk->zapped)
4394                 return;         /* After a valid reset we can send no more */
4395 
4396         tcp_write_wakeup(sk);
4397 
4398         sk->backoff++;
4399         sk->rto = min(sk->rto << 1, 120*HZ);
4400         reset_timer (sk, TIME_PROBE0, sk->rto);
4401         sk->retransmits++;
4402         sk->prot->retransmits ++;
4403 }
4404 
4405 /*
4406  *      Socket option code for TCP. 
4407  */
4408   
4409 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4410 {
4411         int val,err;
4412 
4413         if(level!=SOL_TCP)
4414                 return ip_setsockopt(sk,level,optname,optval,optlen);
4415 
4416         if (optval == NULL) 
4417                 return(-EINVAL);
4418 
4419         err=verify_area(VERIFY_READ, optval, sizeof(int));
4420         if(err)
4421                 return err;
4422         
4423         val = get_fs_long((unsigned long *)optval);
4424 
4425         switch(optname)
4426         {
4427                 case TCP_MAXSEG:
4428 /*                      if(val<200||val>2048 || val>sk->mtu) */
4429 /*
4430  * values greater than interface MTU won't take effect.  however at
4431  * the point when this call is done we typically don't yet know
4432  * which interface is going to be used
4433  */
4434                         if(val<1||val>MAX_WINDOW)
4435                                 return -EINVAL;
4436                         sk->user_mss=val;
4437                         return 0;
4438                 case TCP_NODELAY:
4439                         sk->nonagle=(val==0)?0:1;
4440                         return 0;
4441                 default:
4442                         return(-ENOPROTOOPT);
4443         }
4444 }
4445 
4446 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4447 {
4448         int val,err;
4449 
4450         if(level!=SOL_TCP)
4451                 return ip_getsockopt(sk,level,optname,optval,optlen);
4452                         
4453         switch(optname)
4454         {
4455                 case TCP_MAXSEG:
4456                         val=sk->user_mss;
4457                         break;
4458                 case TCP_NODELAY:
4459                         val=sk->nonagle;        /* Until Johannes stuff is in */
4460                         break;
4461                 default:
4462                         return(-ENOPROTOOPT);
4463         }
4464         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
4465         if(err)
4466                 return err;
4467         put_fs_long(sizeof(int),(unsigned long *) optlen);
4468 
4469         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
4470         if(err)
4471                 return err;
4472         put_fs_long(val,(unsigned long *)optval);
4473 
4474         return(0);
4475 }       
4476 
4477 
4478 struct proto tcp_prot = {
4479         sock_wmalloc,
4480         sock_rmalloc,
4481         sock_wfree,
4482         sock_rfree,
4483         sock_rspace,
4484         sock_wspace,
4485         tcp_close,
4486         tcp_read,
4487         tcp_write,
4488         tcp_sendto,
4489         tcp_recvfrom,
4490         ip_build_header,
4491         tcp_connect,
4492         tcp_accept,
4493         ip_queue_xmit,
4494         tcp_retransmit,
4495         tcp_write_wakeup,
4496         tcp_read_wakeup,
4497         tcp_rcv,
4498         tcp_select,
4499         tcp_ioctl,
4500         NULL,
4501         tcp_shutdown,
4502         tcp_setsockopt,
4503         tcp_getsockopt,
4504         128,
4505         0,
4506         {NULL,},
4507         "TCP"
4508 };

/* [previous][next][first][last][top][bottom][index][help] */