root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. tcp_select_window
  3. tcp_time_wait
  4. tcp_retransmit
  5. tcp_err
  6. tcp_readable
  7. tcp_select
  8. tcp_ioctl
  9. tcp_check
  10. tcp_send_check
  11. tcp_send_skb
  12. tcp_dequeue_partial
  13. tcp_send_partial
  14. tcp_enqueue_partial
  15. tcp_send_ack
  16. tcp_build_header
  17. tcp_write
  18. tcp_sendto
  19. tcp_read_wakeup
  20. cleanup_rbuf
  21. tcp_read_urg
  22. tcp_read
  23. tcp_shutdown
  24. tcp_recvfrom
  25. tcp_reset
  26. tcp_options
  27. default_mask
  28. tcp_conn_request
  29. tcp_close
  30. tcp_write_xmit
  31. sort_send
  32. tcp_ack
  33. tcp_data
  34. tcp_check_urg
  35. tcp_urg
  36. tcp_fin
  37. tcp_accept
  38. tcp_connect
  39. tcp_sequence
  40. tcp_clean_end
  41. tcp_rcv
  42. tcp_write_wakeup
  43. tcp_send_probe0
  44. tcp_setsockopt
  45. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@no.unit.nvg>
  20  *
  21  * Fixes:       
  22  *              Alan Cox        :       Numerous verify_area() calls
  23  *              Alan Cox        :       Set the ACK bit on a reset
  24  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  25  *                                      and was trying to connect (tcp_err()).
  26  *              Alan Cox        :       All icmp error handling was broken
  27  *                                      pointers passed where wrong and the
  28  *                                      socket was looked up backwards. Nobody
  29  *                                      tested any icmp error code obviously.
  30  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  31  *                                      on errors. select behaves and the icmp error race
  32  *                                      has gone by moving it into sock.c
  33  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  34  *                                      packets for unknown sockets.
  35  *              Alan Cox        :       tcp option processing.
  36  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  37  *              Herp Rosmanith  :       More reset fixes
  38  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  39  *                                      any kind of RST is right out.
  40  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  41  *                                      otherwise odd bits of prattle escape still
  42  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  43  *                                      LAN workplace lockups.
  44  *              Alan Cox        :       Some tidyups using the new skb list facilities
  45  *              Alan Cox        :       sk->keepopen now seems to work
  46  *              Alan Cox        :       Pulls options out correctly on accepts
  47  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  48  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  49  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  50  *              Alan Cox        :       Added some beter commenting, as the tcp is hard to follow
  51  *              Alan Cox        :       Removed incorrect check for 20 * psh
  52  *      Michael O'Reilly        :       ack < copied bug fix.
  53  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  54  *              Alan Cox        :       FIN with no memory -> CRASH
  55  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  56  *              Alan Cox        :       Added TCP options (SOL_TCP)
  57  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  58  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  59  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  60  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  61  *              Alan Cox        :       Put in missing check for SYN bit.
  62  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  63  *                                      window non shrink trick.
  64  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  65  *              Charles Hedrick :       TCP fixes
  66  *              Toomas Tamm     :       TCP window fixes
  67  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  68  *              Charles Hedrick :       Rewrote most of it to actually work
  69  *              Linus           :       Rewrote tcp_read() and URG handling
  70  *                                      completely
  71  *              Gerhard Koerting:       Fixed some missing timer handling
  72  *              Matthew Dillon  :       Reworked TCP machine states as per RFC
  73  *              Gerhard Koerting:       PC/TCP workarounds
  74  *              Adam Caldwell   :       Assorted timer/timing errors
  75  *              Matthew Dillon  :       Fixed another RST bug
  76  *              Alan Cox        :       Move to kernel side addressing changes.
  77  *              Alan Cox        :       Beginning work on TCP fastpathing (not yet usable)
  78  *              Arnt Gulbrandsen:       Turbocharged tcp_check() routine.
  79  *              Alan Cox        :       TCP fast path debugging
  80  *              Alan Cox        :       Window clamping
  81  *              Michael Riepe   :       Bug in tcp_check()
  82  *
  83  *
  84  * To Fix:
  85  *                      Possibly a problem with accept(). BSD accept never fails after
  86  *              it causes a select. Linux can - given the official select semantics I
  87  *              feel that _really_ its the BSD network programs that are bust (notably
  88  *              inetd, which hangs occasionally because of this).
  89  *
  90  *                      Fast path the code. Two things here - fix the window calculation
  91  *              so it doesn't iterate over the queue, also spot packets with no funny
  92  *              options arriving in order and process directly.
  93  *
  94  *              This program is free software; you can redistribute it and/or
  95  *              modify it under the terms of the GNU General Public License
  96  *              as published by the Free Software Foundation; either version
  97  *              2 of the License, or(at your option) any later version.
  98  *
  99  * Description of States:
 100  *
 101  *      TCP_SYN_SENT            sent a connection request, waiting for ack
 102  *
 103  *      TCP_SYN_RECV            received a connection request, sent ack,
 104  *                              waiting for final ack in three-way handshake.
 105  *
 106  *      TCP_ESTABLISHED         connection established
 107  *
 108  *      TCP_FIN_WAIT1           our side has shutdown, waiting to complete
 109  *                              transmission of remaining buffered data
 110  *
 111  *      TCP_FIN_WAIT2           all buffered data sent, waiting for remote
 112  *                              to shutdown
 113  *
 114  *      TCP_CLOSING             both sides have shutdown but we still have
 115  *                              data we have to finish sending
 116  *
 117  *      TCP_TIME_WAIT           timeout to catch resent junk before entering
 118  *                              closed, can only be entered from FIN_WAIT2
 119  *                              or CLOSING.  Required because the other end
 120  *                              may not have gotten our last ACK causing it
 121  *                              to retransmit the data packet (which we ignore)
 122  *
 123  *      TCP_CLOSE_WAIT          remote side has shutdown and is waiting for
 124  *                              us to finish writing our data and to shutdown
 125  *                              (we have to close() to move on to LAST_ACK)
 126  *
 127  *      TCP_LAST_ACK            out side has shutdown after remote has
 128  *                              shutdown.  There may still be data in our
 129  *                              buffer that we have to finish sending
 130  *              
 131  *      TCP_CLOSED              socket is finished
 132  */
 133 #include <linux/types.h>
 134 #include <linux/sched.h>
 135 #include <linux/mm.h>
 136 #include <linux/string.h>
 137 #include <linux/socket.h>
 138 #include <linux/sockios.h>
 139 #include <linux/termios.h>
 140 #include <linux/in.h>
 141 #include <linux/fcntl.h>
 142 #include <linux/inet.h>
 143 #include <linux/netdevice.h>
 144 #include "snmp.h"
 145 #include "ip.h"
 146 #include "protocol.h"
 147 #include "icmp.h"
 148 #include "tcp.h"
 149 #include <linux/skbuff.h>
 150 #include "sock.h"
 151 #include "route.h"
 152 #include <linux/errno.h>
 153 #include <linux/timer.h>
 154 #include <asm/system.h>
 155 #include <asm/segment.h>
 156 #include <linux/mm.h>
 157 
 158 #define TCP_FASTPATH
 159 
 160 #define SEQ_TICK 3
 161 unsigned long seq_offset;
 162 struct tcp_mib  tcp_statistics;
 163 
 164 #ifdef TCP_FASTPATH
 165 unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0;
 166 #endif
 167 
 168 
 169 static __inline__ int min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 170 {
 171         if (a < b) 
 172                 return(a);
 173         return(b);
 174 }
 175 
 176 
 177 /* This routine picks a TCP windows for a socket based on
 178    the following constraints
 179    
 180    1. The window can never be shrunk once it is offered (RFC 793)
 181    2. We limit memory per socket
 182    
 183    For now we use NET2E3's heuristic of offering half the memory
 184    we have handy. All is not as bad as this seems however because
 185    of two things. Firstly we will bin packets even within the window
 186    in order to get the data we are waiting for into the memory limit.
 187    Secondly we bin common duplicate forms at receive time
 188    
 189    Better heuristics welcome
 190 */
 191    
 192 int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 193 {
 194         int new_window = sk->prot->rspace(sk);
 195         
 196         if(sk->window_clamp)
 197                 new_window=min(sk->window_clamp,new_window);
 198 /*
 199  * two things are going on here.  First, we don't ever offer a
 200  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 201  * receiver side of SWS as specified in RFC1122.
 202  * Second, we always give them at least the window they
 203  * had before, in order to avoid retracting window.  This
 204  * is technically allowed, but RFC1122 advises against it and
 205  * in practice it causes trouble.
 206  */
 207         if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window)
 208                 return(sk->window);
 209         return(new_window);
 210 }
 211 
 212 /*
 213  *      Enter the time wait state. 
 214  */
 215 
 216 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 217 {
 218         sk->state = TCP_TIME_WAIT;
 219         sk->shutdown = SHUTDOWN_MASK;
 220         if (!sk->dead)
 221                 sk->state_change(sk);
 222         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 223 }
 224 
 225 /*
 226  *      A timer event has trigger a tcp retransmit timeout. The
 227  *      socket xmit queue is ready and set up to send. Because
 228  *      the ack receive code keeps the queue straight we do
 229  *      nothing clever here.
 230  */
 231 
 232 static void tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 233 {
 234         if (all) 
 235         {
 236                 ip_retransmit(sk, all);
 237                 return;
 238         }
 239 
 240         sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 241         /* sk->ssthresh in theory can be zero.  I guess that's OK */
 242         sk->cong_count = 0;
 243 
 244         sk->cong_window = 1;
 245 
 246         /* Do the actual retransmit. */
 247         ip_retransmit(sk, all);
 248 }
 249 
 250 
 251 /*
 252  * This routine is called by the ICMP module when it gets some
 253  * sort of error condition.  If err < 0 then the socket should
 254  * be closed and the error returned to the user.  If err > 0
 255  * it's just the icmp type << 8 | icmp code.  After adjustment
 256  * header points to the first 8 bytes of the tcp header.  We need
 257  * to find the appropriate port.
 258  */
 259 
 260 void tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 261         unsigned long saddr, struct inet_protocol *protocol)
 262 {
 263         struct tcphdr *th;
 264         struct sock *sk;
 265         struct iphdr *iph=(struct iphdr *)header;
 266   
 267         header+=4*iph->ihl;
 268    
 269 
 270         th =(struct tcphdr *)header;
 271         sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr);
 272 
 273         if (sk == NULL) 
 274                 return;
 275   
 276         if(err<0)
 277         {
 278                 sk->err = -err;
 279                 sk->error_report(sk);
 280                 return;
 281         }
 282 
 283         if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) 
 284         {
 285                 /*
 286                  * FIXME:
 287                  * For now we will just trigger a linear backoff.
 288                  * The slow start code should cause a real backoff here.
 289                  */
 290                 if (sk->cong_window > 4)
 291                         sk->cong_window--;
 292                 return;
 293         }
 294 
 295 /*      sk->err = icmp_err_convert[err & 0xff].errno;  -- moved as TCP should hide non fatals internally (and does) */
 296 
 297         /*
 298          * If we've already connected we will keep trying
 299          * until we time out, or the user gives up.
 300          */
 301 
 302         if (icmp_err_convert[err & 0xff].fatal) 
 303         {
 304                 if (sk->state == TCP_SYN_SENT) 
 305                 {
 306                         tcp_statistics.TcpAttemptFails++;
 307                         sk->state = TCP_CLOSE;
 308                         sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 309                 }
 310                 sk->err = icmp_err_convert[err & 0xff].errno;           
 311         }
 312         return;
 313 }
 314 
 315 
 316 /*
 317  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 318  *      in the received data queue (ie a frame missing that needs sending to us)
 319  */
 320 
 321 static int tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 322 {
 323         unsigned long counted;
 324         unsigned long amount;
 325         struct sk_buff *skb;
 326         int sum;
 327         unsigned long flags;
 328 
 329         if(sk && sk->debug)
 330                 printk("tcp_readable: %p - ",sk);
 331 
 332         save_flags(flags);
 333         cli();
 334         if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 335         {
 336                 restore_flags(flags);
 337                 if(sk && sk->debug) 
 338                         printk("empty\n");
 339                 return(0);
 340         }
 341   
 342         counted = sk->copied_seq+1;     /* Where we are at the moment */
 343         amount = 0;
 344   
 345         /* Do until a push or until we are out of data. */
 346         do 
 347         {
 348                 if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 349                         break;
 350                 sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 351                 if (skb->h.th->syn)
 352                         sum++;
 353                 if (sum >= 0) 
 354                 {                                       /* Add it up, move on */
 355                         amount += sum;
 356                         if (skb->h.th->syn) 
 357                                 amount--;
 358                         counted += sum;
 359                 }
 360                 if (amount && skb->h.th->psh) break;
 361                 skb = skb->next;
 362         }
 363         while(skb != (struct sk_buff *)&sk->receive_queue);
 364 
 365         if (amount && !sk->urginline && sk->urg_data &&
 366             (sk->urg_seq - sk->copied_seq) <= (counted - sk->copied_seq))
 367                 amount--;               /* don't count urg data */
 368         restore_flags(flags);
 369         if(sk->debug)
 370                 printk("got %lu bytes.\n",amount);
 371         return(amount);
 372 }
 373 
 374 
 375 /*
 376  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 377  *      listening socket has a receive queue of sockets to accept.
 378  */
 379 
 380 static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 381 {
 382         sk->inuse = 1;
 383 
 384         switch(sel_type) 
 385         {
 386                 case SEL_IN:
 387                         if(sk->debug)
 388                                 printk("select in");
 389                         select_wait(sk->sleep, wait);
 390                         if(sk->debug)
 391                                 printk("-select out");
 392                         if (skb_peek(&sk->receive_queue) != NULL) 
 393                         {
 394                                 if (sk->state == TCP_LISTEN || tcp_readable(sk)) 
 395                                 {
 396                                         release_sock(sk);
 397                                         if(sk->debug)
 398                                                 printk("-select ok data\n");
 399                                         return(1);
 400                                 }
 401                         }
 402                         if (sk->err != 0)       /* Receiver error */
 403                         {
 404                                 release_sock(sk);
 405                                 if(sk->debug)
 406                                         printk("-select ok error");
 407                                 return(1);
 408                         }
 409                         if (sk->shutdown & RCV_SHUTDOWN) 
 410                         {
 411                                 release_sock(sk);
 412                                 if(sk->debug)
 413                                         printk("-select ok down\n");
 414                                 return(1);
 415                         } 
 416                         else 
 417                         {
 418                                 release_sock(sk);
 419                                 if(sk->debug)
 420                                         printk("-select fail\n");
 421                                 return(0);
 422                         }
 423                 case SEL_OUT:
 424                         select_wait(sk->sleep, wait);
 425                         if (sk->shutdown & SEND_SHUTDOWN) 
 426                         {
 427                                 /* FIXME: should this return an error? */
 428                                 release_sock(sk);
 429                                 return(0);
 430                         }
 431 
 432                         /*
 433                          * FIXME:
 434                          * Hack so it will probably be able to write
 435                          * something if it says it's ok to write.
 436                          */
 437                         
 438                         if (sk->prot->wspace(sk) >= sk->mss) 
 439                         {
 440                                 release_sock(sk);
 441                                 /* This should cause connect to work ok. */
 442                                 if (sk->state == TCP_SYN_RECV ||
 443                                     sk->state == TCP_SYN_SENT) return(0);
 444                                 return(1);
 445                         }
 446                         release_sock(sk);
 447                         return(0);
 448                 case SEL_EX:
 449                         select_wait(sk->sleep,wait);
 450                         if (sk->err || sk->urg_data) 
 451                         {
 452                                 release_sock(sk);
 453                                 return(1);
 454                         }
 455                         release_sock(sk);
 456                         return(0);
 457         }
 458 
 459         release_sock(sk);
 460         return(0);
 461 }
 462 
 463 
 464 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 465 {
 466         int err;
 467         switch(cmd) 
 468         {
 469 
 470                 case TIOCINQ:
 471 #ifdef FIXME    /* FIXME: */
 472                 case FIONREAD:
 473 #endif
 474                 {
 475                         unsigned long amount;
 476 
 477                         if (sk->state == TCP_LISTEN) 
 478                                 return(-EINVAL);
 479 
 480                         sk->inuse = 1;
 481                         amount = tcp_readable(sk);
 482                         release_sock(sk);
 483                         err=verify_area(VERIFY_WRITE,(void *)arg,
 484                                                    sizeof(unsigned long));
 485                         if(err)
 486                                 return err;
 487                         put_fs_long(amount,(unsigned long *)arg);
 488                         return(0);
 489                 }
 490                 case SIOCATMARK:
 491                 {
 492                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
 493 
 494                         err = verify_area(VERIFY_WRITE,(void *) arg,
 495                                                   sizeof(unsigned long));
 496                         if (err)
 497                                 return err;
 498                         put_fs_long(answ,(int *) arg);
 499                         return(0);
 500                 }
 501                 case TIOCOUTQ:
 502                 {
 503                         unsigned long amount;
 504 
 505                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 506                         amount = sk->prot->wspace(sk);
 507                         err=verify_area(VERIFY_WRITE,(void *)arg,
 508                                                    sizeof(unsigned long));
 509                         if(err)
 510                                 return err;
 511                         put_fs_long(amount,(unsigned long *)arg);
 512                         return(0);
 513                 }
 514                 default:
 515                         return(-EINVAL);
 516         }
 517 }
 518 
 519 
 520 /*
 521  *      This routine computes a TCP checksum. 
 522  */
 523  
 524 unsigned short tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 525           unsigned long saddr, unsigned long daddr)
 526 {     
 527         unsigned long sum;
 528    
 529         if (saddr == 0) saddr = ip_my_addr();
 530 
 531 /*
 532  * stupid, gcc complains when I use just one __asm__ block,
 533  * something about too many reloads, but this is just two
 534  * instructions longer than what I want
 535  */
 536         __asm__("
 537             addl %%ecx, %%ebx
 538             adcl %%edx, %%ebx
 539             adcl $0, %%ebx
 540             "
 541         : "=b"(sum)
 542         : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 543         : "bx", "cx", "dx" );
 544         __asm__("
 545             movl %%ecx, %%edx
 546             cld
 547             cmpl $32, %%ecx
 548             jb 2f
 549             shrl $5, %%ecx
 550             clc
 551 1:          lodsl
 552             adcl %%eax, %%ebx
 553             lodsl
 554             adcl %%eax, %%ebx
 555             lodsl
 556             adcl %%eax, %%ebx
 557             lodsl
 558             adcl %%eax, %%ebx
 559             lodsl
 560             adcl %%eax, %%ebx
 561             lodsl
 562             adcl %%eax, %%ebx
 563             lodsl
 564             adcl %%eax, %%ebx
 565             lodsl
 566             adcl %%eax, %%ebx
 567             loop 1b
 568             adcl $0, %%ebx
 569             movl %%edx, %%ecx
 570 2:          andl $28, %%ecx
 571             je 4f
 572             shrl $2, %%ecx
 573             clc
 574 3:          lodsl
 575             adcl %%eax, %%ebx
 576             loop 3b
 577             adcl $0, %%ebx
 578 4:          movl $0, %%eax
 579             testw $2, %%dx
 580             je 5f
 581             lodsw
 582             addl %%eax, %%ebx
 583             adcl $0, %%ebx
 584             movw $0, %%ax
 585 5:          test $1, %%edx
 586             je 6f
 587             lodsb
 588             addl %%eax, %%ebx
 589             adcl $0, %%ebx
 590 6:          movl %%ebx, %%eax
 591             shrl $16, %%eax
 592             addw %%ax, %%bx
 593             adcw $0, %%bx
 594             "
 595         : "=b"(sum)
 596         : "0"(sum), "c"(len), "S"(th)
 597         : "ax", "bx", "cx", "dx", "si" );
 598 
 599         /* We only want the bottom 16 bits, but we never cleared the top 16. */
 600   
 601         return((~sum) & 0xffff);
 602 }
 603 
 604 
 605 
 606 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 607                 unsigned long daddr, int len, struct sock *sk)
 608 {
 609         th->check = 0;
 610         th->check = tcp_check(th, len, saddr, daddr);
 611         return;
 612 }
 613 
 614 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 615 {
 616         int size;
 617         struct tcphdr * th = skb->h.th;
 618 
 619         /* length of packet (not counting length of pre-tcp headers) */
 620         size = skb->len - ((unsigned char *) th - skb->data);
 621 
 622         /* sanity check it.. */
 623         if (size < sizeof(struct tcphdr) || size > skb->len) 
 624         {
 625                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 626                         skb, skb->data, th, skb->len);
 627                 kfree_skb(skb, FREE_WRITE);
 628                 return;
 629         }
 630 
 631         /* If we have queued a header size packet.. */
 632         if (size == sizeof(struct tcphdr)) 
 633         {
 634                 /* If its got a syn or fin its notionally included in the size..*/
 635                 if(!th->syn && !th->fin) 
 636                 {
 637                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 638                         kfree_skb(skb,FREE_WRITE);
 639                         return;
 640                 }
 641         }
 642 
 643         tcp_statistics.TcpOutSegs++;  
 644 
 645         skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
 646         if (after(skb->h.seq, sk->window_seq) ||
 647             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 648              sk->packets_out >= sk->cong_window) 
 649         {
 650                 /* checksum will be supplied by tcp_write_xmit.  So
 651                  * we shouldn't need to set it at all.  I'm being paraoid */
 652                 th->check = 0;
 653                 if (skb->next != NULL) 
 654                 {
 655                         printk("tcp_send_partial: next != NULL\n");
 656                         skb_unlink(skb);
 657                 }
 658                 skb_queue_tail(&sk->write_queue, skb);
 659                 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 660                     sk->send_head == NULL &&
 661                     sk->ack_backlog == 0)
 662                         reset_timer(sk, TIME_PROBE0, sk->rto);
 663         } 
 664         else 
 665         {
 666                 th->ack_seq = ntohl(sk->acked_seq);
 667                 th->window = ntohs(tcp_select_window(sk));
 668 
 669                 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
 670 
 671                 sk->sent_seq = sk->write_seq;
 672                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 673         }
 674 }
 675 
 676 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 677 {
 678         struct sk_buff * skb;
 679         unsigned long flags;
 680 
 681         save_flags(flags);
 682         cli();
 683         skb = sk->partial;
 684         if (skb) 
 685         {
 686                 sk->partial = NULL;
 687                 del_timer(&sk->partial_timer);
 688         }
 689         restore_flags(flags);
 690         return skb;
 691 }
 692 
 693 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 694 {
 695         struct sk_buff *skb;
 696 
 697         if (sk == NULL)
 698                 return;
 699         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 700                 tcp_send_skb(sk, skb);
 701 }
 702 
 703 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 704 {
 705         struct sk_buff * tmp;
 706         unsigned long flags;
 707 
 708         save_flags(flags);
 709         cli();
 710         tmp = sk->partial;
 711         if (tmp)
 712                 del_timer(&sk->partial_timer);
 713         sk->partial = skb;
 714         sk->partial_timer.expires = HZ;
 715         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 716         sk->partial_timer.data = (unsigned long) sk;
 717         add_timer(&sk->partial_timer);
 718         restore_flags(flags);
 719         if (tmp)
 720                 tcp_send_skb(sk, tmp);
 721 }
 722 
 723 
 724 /*
 725  *      This routine sends an ack and also updates the window. 
 726  */
 727  
 728 static void tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 729              struct sock *sk,
 730              struct tcphdr *th, unsigned long daddr)
 731 {
 732         struct sk_buff *buff;
 733         struct tcphdr *t1;
 734         struct device *dev = NULL;
 735         int tmp;
 736 
 737         if(sk->zapped)
 738                 return;         /* We have been reset, we may not send again */
 739         /*
 740          * We need to grab some memory, and put together an ack,
 741          * and then put it into the queue to be sent.
 742          */
 743 
 744         buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 745         if (buff == NULL) 
 746         {
 747                 /* Force it to send an ack. */
 748                 sk->ack_backlog++;
 749                 if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) 
 750                 {
 751                         reset_timer(sk, TIME_WRITE, 10);
 752                 }
 753                 return;
 754         }
 755 
 756         buff->len = sizeof(struct tcphdr);
 757         buff->sk = sk;
 758         buff->localroute = sk->localroute;
 759         t1 =(struct tcphdr *) buff->data;
 760 
 761         /* Put in the IP header and routing stuff. */
 762         tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 763                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 764         if (tmp < 0) 
 765         {
 766                 buff->free=1;
 767                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 768                 return;
 769         }
 770         buff->len += tmp;
 771         t1 =(struct tcphdr *)((char *)t1 +tmp);
 772 
 773         /* FIXME: */
 774         memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 775 
 776         /*
 777          *      Swap the send and the receive. 
 778          */
 779          
 780         t1->dest = th->source;
 781         t1->source = th->dest;
 782         t1->seq = ntohl(sequence);
 783         t1->ack = 1;
 784         sk->window = tcp_select_window(sk);
 785         t1->window = ntohs(sk->window);
 786         t1->res1 = 0;
 787         t1->res2 = 0;
 788         t1->rst = 0;
 789         t1->urg = 0;
 790         t1->syn = 0;
 791         t1->psh = 0;
 792         t1->fin = 0;
 793         if (ack == sk->acked_seq) 
 794         {
 795                 sk->ack_backlog = 0;
 796                 sk->bytes_rcv = 0;
 797                 sk->ack_timed = 0;
 798                 if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
 799                                   && sk->timeout == TIME_WRITE) 
 800                 {
 801                         if(sk->keepopen)
 802                                 reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 803                         else
 804                                 delete_timer(sk);
 805                 }
 806         }
 807         t1->ack_seq = ntohl(ack);
 808         t1->doff = sizeof(*t1)/4;
 809         tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 810         if (sk->debug)
 811                  printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 812         tcp_statistics.TcpOutSegs++;
 813         sk->prot->queue_xmit(sk, dev, buff, 1);
 814 }
 815 
 816 
 817 /* 
 818  *      This routine builds a generic TCP header. 
 819  */
 820  
 821 static int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 822 {
 823 
 824         /* FIXME: want to get rid of this. */
 825         memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 826         th->seq = htonl(sk->write_seq);
 827         th->psh =(push == 0) ? 1 : 0;
 828         th->doff = sizeof(*th)/4;
 829         th->ack = 1;
 830         th->fin = 0;
 831         sk->ack_backlog = 0;
 832         sk->bytes_rcv = 0;
 833         sk->ack_timed = 0;
 834         th->ack_seq = htonl(sk->acked_seq);
 835         sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/;
 836         th->window = htons(sk->window);
 837 
 838         return(sizeof(*th));
 839 }
 840 
 841 /*
 842  *      This routine copies from a user buffer into a socket,
 843  *      and starts the transmit system.
 844  */
 845 
 846 static int tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 847           int len, int nonblock, unsigned flags)
 848 {
 849         int copied = 0;
 850         int copy;
 851         int tmp;
 852         struct sk_buff *skb;
 853         struct sk_buff *send_tmp;
 854         unsigned char *buff;
 855         struct proto *prot;
 856         struct device *dev = NULL;
 857 
 858         sk->inuse=1;
 859         prot = sk->prot;
 860         while(len > 0) 
 861         {
 862                 if (sk->err) 
 863                 {                       /* Stop on an error */
 864                         release_sock(sk);
 865                         if (copied) 
 866                                 return(copied);
 867                         tmp = -sk->err;
 868                         sk->err = 0;
 869                         return(tmp);
 870                 }
 871 
 872         /*
 873          *      First thing we do is make sure that we are established. 
 874          */
 875         
 876                 if (sk->shutdown & SEND_SHUTDOWN) 
 877                 {
 878                         release_sock(sk);
 879                         sk->err = EPIPE;
 880                         if (copied) 
 881                                 return(copied);
 882                         sk->err = 0;
 883                         return(-EPIPE);
 884                 }
 885 
 886 
 887         /* 
 888          *      Wait for a connection to finish.
 889          */
 890         
 891                 while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
 892                 {
 893                         if (sk->err) 
 894                         {
 895                                 release_sock(sk);
 896                                 if (copied) 
 897                                         return(copied);
 898                                 tmp = -sk->err;
 899                                 sk->err = 0;
 900                                 return(tmp);
 901                         }
 902 
 903                         if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
 904                         {
 905                                 release_sock(sk);
 906                                 if (copied) 
 907                                         return(copied);
 908 
 909                                 if (sk->err) 
 910                                 {
 911                                         tmp = -sk->err;
 912                                         sk->err = 0;
 913                                         return(tmp);
 914                                 }
 915 
 916                                 if (sk->keepopen) 
 917                                 {
 918                                         send_sig(SIGPIPE, current, 0);
 919                                 }
 920                                 return(-EPIPE);
 921                         }
 922 
 923                         if (nonblock || copied) 
 924                         {
 925                                 release_sock(sk);
 926                                 if (copied) 
 927                                         return(copied);
 928                                 return(-EAGAIN);
 929                         }
 930 
 931                         release_sock(sk);
 932                         cli();
 933                 
 934                         if (sk->state != TCP_ESTABLISHED &&
 935                                 sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
 936                         {
 937                                 interruptible_sleep_on(sk->sleep);
 938                                 if (current->signal & ~current->blocked) 
 939                                 {
 940                                         sti();
 941                                         if (copied) 
 942                                                 return(copied);
 943                                         return(-ERESTARTSYS);
 944                                 }
 945                         }
 946                         sk->inuse = 1;
 947                         sti();
 948                 }
 949 
 950         /*
 951          * The following code can result in copy <= if sk->mss is ever
 952          * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
 953          * sk->mtu is constant once SYN processing is finished.  I.e. we
 954          * had better not get here until we've seen his SYN and at least one
 955          * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
 956          * But ESTABLISHED should guarantee that.  sk->max_window is by definition
 957          * non-decreasing.  Note that any ioctl to set user_mss must be done
 958          * before the exchange of SYN's.  If the initial ack from the other
 959          * end has a window of 0, max_window and thus mss will both be 0.
 960          */
 961 
 962         /* 
 963          *      Now we need to check if we have a half built packet. 
 964          */
 965 
 966                 if ((skb = tcp_dequeue_partial(sk)) != NULL) 
 967                 {
 968                         int hdrlen;
 969 
 970                          /* IP header + TCP header */
 971                         hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
 972                                  + sizeof(struct tcphdr);
 973         
 974                         /* Add more stuff to the end of skb->len */
 975                         if (!(flags & MSG_OOB)) 
 976                         {
 977                                 copy = min(sk->mss - (skb->len - hdrlen), len);
 978                                 /* FIXME: this is really a bug. */
 979                                 if (copy <= 0) 
 980                                 {
 981                                         printk("TCP: **bug**: \"copy\" <= 0!!\n");
 982                                         copy = 0;
 983                                 }
 984           
 985                                 memcpy_fromfs(skb->data + skb->len, from, copy);
 986                                 skb->len += copy;
 987                                 from += copy;
 988                                 copied += copy;
 989                                 len -= copy;
 990                                 sk->write_seq += copy;
 991                         }
 992                         if ((skb->len - hdrlen) >= sk->mss ||
 993                                 (flags & MSG_OOB) || !sk->packets_out)
 994                                 tcp_send_skb(sk, skb);
 995                         else
 996                                 tcp_enqueue_partial(skb, sk);
 997                         continue;
 998                 }
 999 
1000         /*
1001          * We also need to worry about the window.
1002          * If window < 1/2 the maximum window we've seen from this
1003          *   host, don't use it.  This is sender side
1004          *   silly window prevention, as specified in RFC1122.
1005          *   (Note that this is diffferent than earlier versions of
1006          *   SWS prevention, e.g. RFC813.).  What we actually do is 
1007          *   use the whole MSS.  Since the results in the right
1008          *   edge of the packet being outside the window, it will
1009          *   be queued for later rather than sent.
1010          */
1011 
1012                 copy = sk->window_seq - sk->write_seq;
1013                 if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
1014                         copy = sk->mss;
1015                 if (copy > len)
1016                         copy = len;
1017 
1018         /*
1019          *      We should really check the window here also. 
1020          */
1021          
1022                 send_tmp = NULL;
1023                 if (copy < sk->mss && !(flags & MSG_OOB)) 
1024                 {
1025                         /*
1026                          *      We will release the socket incase we sleep here. 
1027                          */
1028                         release_sock(sk);
1029                         /*
1030                          *      NB: following must be mtu, because mss can be increased.
1031                          *      mss is always <= mtu 
1032                          */
1033                         skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
1034                         sk->inuse = 1;
1035                         send_tmp = skb;
1036                 } 
1037                 else 
1038                 {
1039                         /*
1040                          *      We will release the socket incase we sleep here. 
1041                          */
1042                         release_sock(sk);
1043                         skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
1044                         sk->inuse = 1;
1045                 }
1046 
1047                 /*
1048                  *      If we didn't get any memory, we need to sleep. 
1049                  */
1050 
1051                 if (skb == NULL) 
1052                 {
1053                         if (nonblock /* || copied */) 
1054                         {
1055                                 release_sock(sk);
1056                                 if (copied) 
1057                                         return(copied);
1058                                 return(-EAGAIN);
1059                         }
1060 
1061                         /*
1062                          *      FIXME: here is another race condition. 
1063                          */
1064 
1065                         tmp = sk->wmem_alloc;
1066                         release_sock(sk);
1067                         cli();
1068                         /*
1069                          *      Again we will try to avoid it. 
1070                          */
1071                         if (tmp <= sk->wmem_alloc &&
1072                                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
1073                                 && sk->err == 0) 
1074                         {
1075                                 interruptible_sleep_on(sk->sleep);
1076                                 if (current->signal & ~current->blocked) 
1077                                 {
1078                                         sti();
1079                                         if (copied) 
1080                                                 return(copied);
1081                                         return(-ERESTARTSYS);
1082                                 }
1083                         }
1084                         sk->inuse = 1;
1085                         sti();
1086                         continue;
1087                 }
1088 
1089                 skb->len = 0;
1090                 skb->sk = sk;
1091                 skb->free = 0;
1092                 skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
1093         
1094                 buff = skb->data;
1095         
1096                 /*
1097                  * FIXME: we need to optimize this.
1098                  * Perhaps some hints here would be good.
1099                  */
1100                 
1101                 tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1102                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1103                 if (tmp < 0 ) 
1104                 {
1105                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1106                         release_sock(sk);
1107                         if (copied) 
1108                                 return(copied);
1109                         return(tmp);
1110                 }
1111                 skb->len += tmp;
1112                 skb->dev = dev;
1113                 buff += tmp;
1114                 skb->h.th =(struct tcphdr *) buff;
1115                 tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1116                 if (tmp < 0) 
1117                 {
1118                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1119                         release_sock(sk);
1120                         if (copied) 
1121                                 return(copied);
1122                         return(tmp);
1123                 }
1124 
1125                 if (flags & MSG_OOB) 
1126                 {
1127                         ((struct tcphdr *)buff)->urg = 1;
1128                         ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1129                 }
1130                 skb->len += tmp;
1131                 memcpy_fromfs(buff+tmp, from, copy);
1132 
1133                 from += copy;
1134                 copied += copy;
1135                 len -= copy;
1136                 skb->len += copy;
1137                 skb->free = 0;
1138                 sk->write_seq += copy;
1139         
1140                 if (send_tmp != NULL && sk->packets_out) 
1141                 {
1142                         tcp_enqueue_partial(send_tmp, sk);
1143                         continue;
1144                 }
1145                 tcp_send_skb(sk, skb);
1146         }
1147         sk->err = 0;
1148 
1149 /*
1150  *      Nagles rule. Turn Nagle off with TCP_NODELAY for highly
1151  *      interactive fast network servers. It's meant to be on and
1152  *      it really improves the throughput though not the echo time
1153  *      on my slow slip link - Alan
1154  */
1155 
1156 /*
1157  *      Avoid possible race on send_tmp - c/o Johannes Stille 
1158  */
1159  
1160         if(sk->partial && ((!sk->packets_out) 
1161      /* If not nagling we can send on the before case too.. */
1162               || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1163         ))
1164                 tcp_send_partial(sk);
1165 
1166         release_sock(sk);
1167         return(copied);
1168 }
1169 
1170 
1171 static int tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1172            int len, int nonblock, unsigned flags,
1173            struct sockaddr_in *addr, int addr_len)
1174 {
1175         if (flags & ~(MSG_OOB|MSG_DONTROUTE))
1176                 return -EINVAL;
1177         if (addr_len < sizeof(*addr)) 
1178                 return(-EINVAL);
1179         if (addr->sin_family && addr->sin_family != AF_INET) 
1180                 return(-EINVAL);
1181         if (addr->sin_port != sk->dummy_th.dest) 
1182                 return(-EISCONN);
1183         if (addr->sin_addr.s_addr != sk->daddr) 
1184                 return(-EISCONN);
1185         return(tcp_write(sk, from, len, nonblock, flags));
1186 }
1187 
1188 
1189 static void tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1190 {
1191         int tmp;
1192         struct device *dev = NULL;
1193         struct tcphdr *t1;
1194         struct sk_buff *buff;
1195 
1196         if (!sk->ack_backlog) 
1197                 return;
1198 
1199         /*
1200          * FIXME: we need to put code here to prevent this routine from
1201          * being called.  Being called once in a while is ok, so only check
1202          * if this is the second time in a row.
1203          */
1204 
1205         /*
1206          * We need to grab some memory, and put together an ack,
1207          * and then put it into the queue to be sent.
1208          */
1209 
1210         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1211         if (buff == NULL) 
1212         {
1213                 /* Try again real soon. */
1214                 reset_timer(sk, TIME_WRITE, 10);
1215                 return;
1216         }
1217 
1218         buff->len = sizeof(struct tcphdr);
1219         buff->sk = sk;
1220         buff->localroute = sk->localroute;
1221         
1222         /*
1223          *      Put in the IP header and routing stuff. 
1224          */
1225 
1226         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1227                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1228         if (tmp < 0) 
1229         {
1230                 buff->free=1;
1231                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1232                 return;
1233         }
1234 
1235         buff->len += tmp;
1236         t1 =(struct tcphdr *)(buff->data +tmp);
1237 
1238         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1239         t1->seq = htonl(sk->sent_seq);
1240         t1->ack = 1;
1241         t1->res1 = 0;
1242         t1->res2 = 0;
1243         t1->rst = 0;
1244         t1->urg = 0;
1245         t1->syn = 0;
1246         t1->psh = 0;
1247         sk->ack_backlog = 0;
1248         sk->bytes_rcv = 0;
1249         sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
1250         t1->window = ntohs(sk->window);
1251         t1->ack_seq = ntohl(sk->acked_seq);
1252         t1->doff = sizeof(*t1)/4;
1253         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1254         sk->prot->queue_xmit(sk, dev, buff, 1);
1255         tcp_statistics.TcpOutSegs++;
1256 }
1257 
1258 
1259 /*
1260  *      FIXME:
1261  *      This routine frees used buffers.
1262  *      It should consider sending an ACK to let the
1263  *      other end know we now have a bigger window.
1264  */
1265 
1266 static void cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1267 {
1268         unsigned long flags;
1269         unsigned long left;
1270         struct sk_buff *skb;
1271         unsigned long rspace;
1272 
1273         if(sk->debug)
1274                 printk("cleaning rbuf for sk=%p\n", sk);
1275   
1276         save_flags(flags);
1277         cli();
1278   
1279         left = sk->prot->rspace(sk);
1280  
1281         /*
1282          * We have to loop through all the buffer headers,
1283          * and try to free up all the space we can.
1284          */
1285 
1286         while((skb=skb_peek(&sk->receive_queue)) != NULL) 
1287         {
1288                 if (!skb->used) 
1289                         break;
1290                 skb_unlink(skb);
1291                 skb->sk = sk;
1292                 kfree_skb(skb, FREE_READ);
1293         }
1294 
1295         restore_flags(flags);
1296 
1297         /*
1298          * FIXME:
1299          * At this point we should send an ack if the difference
1300          * in the window, and the amount of space is bigger than
1301          * TCP_WINDOW_DIFF.
1302          */
1303 
1304         if(sk->debug)
1305                 printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk),
1306                                             left);
1307         if ((rspace=sk->prot->rspace(sk)) != left) 
1308         {
1309                 /*
1310                  * This area has caused the most trouble.  The current strategy
1311                  * is to simply do nothing if the other end has room to send at
1312                  * least 3 full packets, because the ack from those will auto-
1313                  * matically update the window.  If the other end doesn't think
1314                  * we have much space left, but we have room for atleast 1 more
1315                  * complete packet than it thinks we do, we will send an ack
1316                  * immediatedly.  Otherwise we will wait up to .5 seconds in case
1317                  * the user reads some more.
1318                  */
1319                 sk->ack_backlog++;
1320         /*
1321          * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1322          * if the other end is offering a window smaller than the agreed on MSS
1323          * (called sk->mtu here).  In theory there's no connection between send
1324          * and receive, and so no reason to think that they're going to send
1325          * small packets.  For the moment I'm using the hack of reducing the mss
1326          * only on the send side, so I'm putting mtu here.
1327          */
1328 
1329                 if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) 
1330                 {
1331                         /* Send an ack right now. */
1332                         tcp_read_wakeup(sk);
1333                 } 
1334                 else 
1335                 {
1336                         /* Force it to send an ack soon. */
1337                         int was_active = del_timer(&sk->timer);
1338                         if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
1339                         {
1340                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1341                         } 
1342                         else
1343                                 add_timer(&sk->timer);
1344                 }
1345         }
1346 } 
1347 
1348 
1349 /*
1350  *      Handle reading urgent data. 
1351  */
1352  
1353 static int tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1354              unsigned char *to, int len, unsigned flags)
1355 {
1356         struct wait_queue wait = { current, NULL };
1357 
1358         while (len > 0) 
1359         {
1360                 if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1361                         return -EINVAL;
1362                 if (sk->urg_data & URG_VALID) 
1363                 {
1364                         char c = sk->urg_data;
1365                         if (!(flags & MSG_PEEK))
1366                                 sk->urg_data = URG_READ;
1367                         put_fs_byte(c, to);
1368                         return 1;
1369                 }
1370 
1371                 if (sk->err) 
1372                 {
1373                         int tmp = -sk->err;
1374                         sk->err = 0;
1375                         return tmp;
1376                 }
1377 
1378                 if (sk->state == TCP_CLOSE || sk->done) 
1379                 {
1380                         if (!sk->done) {
1381                                 sk->done = 1;
1382                                 return 0;
1383                         }
1384                         return -ENOTCONN;
1385                 }
1386 
1387                 if (sk->shutdown & RCV_SHUTDOWN) 
1388                 {
1389                         sk->done = 1;
1390                         return 0;
1391                 }
1392 
1393                 if (nonblock)
1394                         return -EAGAIN;
1395 
1396                 if (current->signal & ~current->blocked)
1397                         return -ERESTARTSYS;
1398 
1399                 current->state = TASK_INTERRUPTIBLE;
1400                 add_wait_queue(sk->sleep, &wait);
1401                 if ((sk->urg_data & URG_NOTYET) && sk->err == 0 &&
1402                     !(sk->shutdown & RCV_SHUTDOWN))
1403                         schedule();
1404                 remove_wait_queue(sk->sleep, &wait);
1405                 current->state = TASK_RUNNING;
1406         }
1407         return 0;
1408 }
1409 
1410 
1411 /*
1412  *      This routine copies from a sock struct into the user buffer. 
1413  */
1414  
1415 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1416         int len, int nonblock, unsigned flags)
1417 {
1418         struct wait_queue wait = { current, NULL };
1419         int copied = 0;
1420         unsigned long peek_seq;
1421         unsigned long *seq;
1422         unsigned long used;
1423 
1424         /* This error should be checked. */
1425         if (sk->state == TCP_LISTEN)
1426                 return -ENOTCONN;
1427 
1428         /* Urgent data needs to be handled specially. */
1429         if (flags & MSG_OOB)
1430                 return tcp_read_urg(sk, nonblock, to, len, flags);
1431 
1432         peek_seq = sk->copied_seq;
1433         seq = &sk->copied_seq;
1434         if (flags & MSG_PEEK)
1435                 seq = &peek_seq;
1436 
1437         add_wait_queue(sk->sleep, &wait);
1438         sk->inuse = 1;
1439         while (len > 0) 
1440         {
1441                 struct sk_buff * skb;
1442                 unsigned long offset;
1443         
1444                 /*
1445                  * are we at urgent data? Stop if we have read anything.
1446                  */
1447                 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1448                         break;
1449 
1450                 current->state = TASK_INTERRUPTIBLE;
1451 
1452                 skb = skb_peek(&sk->receive_queue);
1453                 do 
1454                 {
1455                         if (!skb)
1456                                 break;
1457                         if (before(1+*seq, skb->h.th->seq))
1458                                 break;
1459                         offset = 1 + *seq - skb->h.th->seq;
1460                         if (skb->h.th->syn)
1461                                 offset--;
1462                         if (offset < skb->len)
1463                                 goto found_ok_skb;
1464                         if (!(flags & MSG_PEEK))
1465                                 skb->used = 1;
1466                         skb = skb->next;
1467                 }
1468                 while (skb != (struct sk_buff *)&sk->receive_queue);
1469 
1470                 if (copied)
1471                         break;
1472 
1473                 if (sk->err) 
1474                 {
1475                         copied = -sk->err;
1476                         sk->err = 0;
1477                         break;
1478                 }
1479 
1480                 if (sk->state == TCP_CLOSE) 
1481                 {
1482                         if (!sk->done) 
1483                         {
1484                                 sk->done = 1;
1485                                 break;
1486                         }
1487                         copied = -ENOTCONN;
1488                         break;
1489                 }
1490 
1491                 if (sk->shutdown & RCV_SHUTDOWN) 
1492                 {
1493                         sk->done = 1;
1494                         break;
1495                 }
1496                         
1497                 if (nonblock) 
1498                 {
1499                         copied = -EAGAIN;
1500                         break;
1501                 }
1502 
1503                 cleanup_rbuf(sk);
1504                 release_sock(sk);
1505                 schedule();
1506                 sk->inuse = 1;
1507 
1508                 if (current->signal & ~current->blocked) 
1509                 {
1510                         copied = -ERESTARTSYS;
1511                         break;
1512                 }
1513                 continue;
1514 
1515         found_ok_skb:
1516                 /* Ok so how much can we use ? */
1517                 used = skb->len - offset;
1518                 if (len < used)
1519                         used = len;
1520                 /* do we have urgent data here? */
1521                 if (sk->urg_data) 
1522                 {
1523                         unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1524                         if (urg_offset < used) 
1525                         {
1526                                 if (!urg_offset) 
1527                                 {
1528                                         if (!sk->urginline) 
1529                                         {
1530                                                 ++*seq;
1531                                                 offset++;
1532                                                 used--;
1533                                         }
1534                                 }
1535                                 else
1536                                         used = urg_offset;
1537                         }
1538                 }
1539                 /* Copy it */
1540                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1541                         skb->h.th->doff*4 + offset, used);
1542                 copied += used;
1543                 len -= used;
1544                 to += used;
1545                 *seq += used;
1546                 if (after(sk->copied_seq+1,sk->urg_seq))
1547                         sk->urg_data = 0;
1548                 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1549                         skb->used = 1;
1550         }
1551         remove_wait_queue(sk->sleep, &wait);
1552         current->state = TASK_RUNNING;
1553 
1554         /* Clean up data we have read: This will do ACK frames */
1555         cleanup_rbuf(sk);
1556         release_sock(sk);
1557         return copied;
1558 }
1559 
1560  
1561 /*
1562  *      Shutdown the sending side of a connection.
1563  */
1564 
1565 void tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1566 {
1567         struct sk_buff *buff;
1568         struct tcphdr *t1, *th;
1569         struct proto *prot;
1570         int tmp;
1571         struct device *dev = NULL;
1572 
1573         /*
1574          * We need to grab some memory, and put together a FIN,
1575          * and then put it into the queue to be sent.
1576          * FIXME:
1577          *
1578          *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1579          *      Most of this is guesswork, so maybe it will work...
1580          */
1581 
1582         if (!(how & SEND_SHUTDOWN)) 
1583                 return;
1584          
1585         /*
1586          *      If we've already sent a FIN, return. 
1587          */
1588          
1589         if (sk->state == TCP_FIN_WAIT1 ||
1590             sk->state == TCP_FIN_WAIT2 ||
1591             sk->state == TCP_CLOSING ||
1592             sk->state == TCP_LAST_ACK ||
1593             sk->state == TCP_TIME_WAIT
1594         ) 
1595         {
1596                 return;
1597         }
1598         sk->inuse = 1;
1599 
1600         /*
1601          * flag that the sender has shutdown
1602          */
1603 
1604         sk->shutdown |= SEND_SHUTDOWN;
1605 
1606         /*
1607          *  Clear out any half completed packets. 
1608          */
1609 
1610         if (sk->partial)
1611                 tcp_send_partial(sk);
1612 
1613         prot =(struct proto *)sk->prot;
1614         th =(struct tcphdr *)&sk->dummy_th;
1615         release_sock(sk); /* incase the malloc sleeps. */
1616         buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1617         if (buff == NULL)
1618                 return;
1619         sk->inuse = 1;
1620 
1621         buff->sk = sk;
1622         buff->len = sizeof(*t1);
1623         buff->localroute = sk->localroute;
1624         t1 =(struct tcphdr *) buff->data;
1625 
1626         /*
1627          *      Put in the IP header and routing stuff. 
1628          */
1629 
1630         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1631                            IPPROTO_TCP, sk->opt,
1632                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1633         if (tmp < 0) 
1634         {
1635                 /*
1636                  *      Finish anyway, treat this as a send that got lost. 
1637                  *
1638                  *      Enter FIN_WAIT1 on normal shutdown, which waits for
1639                  *      written data to be completely acknowledged along
1640                  *      with an acknowledge to our FIN.
1641                  *
1642                  *      Enter FIN_WAIT2 on abnormal shutdown -- close before
1643                  *      connection established.
1644                  */
1645                 buff->free=1;
1646                 prot->wfree(sk,buff->mem_addr, buff->mem_len);
1647 
1648                 if (sk->state == TCP_ESTABLISHED)
1649                         sk->state = TCP_FIN_WAIT1;
1650                 else if(sk->state == TCP_CLOSE_WAIT)
1651                         sk->state = TCP_LAST_ACK;
1652                 else
1653                         sk->state = TCP_FIN_WAIT2;
1654 
1655                 release_sock(sk);
1656                 return;
1657         }
1658 
1659         t1 =(struct tcphdr *)((char *)t1 +tmp);
1660         buff->len += tmp;
1661         buff->dev = dev;
1662         memcpy(t1, th, sizeof(*t1));
1663         t1->seq = ntohl(sk->write_seq);
1664         sk->write_seq++;
1665         buff->h.seq = sk->write_seq;
1666         t1->ack = 1;
1667         t1->ack_seq = ntohl(sk->acked_seq);
1668         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
1669         t1->fin = 1;
1670         t1->rst = 0;
1671         t1->doff = sizeof(*t1)/4;
1672         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1673 
1674         /*
1675          * Can't just queue this up.
1676          * It should go at the end of the write queue.
1677          */
1678         
1679         if (skb_peek(&sk->write_queue) != NULL) 
1680         {
1681                 buff->free=0;
1682                 if (buff->next != NULL) 
1683                 {
1684                         printk("tcp_shutdown: next != NULL\n");
1685                         skb_unlink(buff);
1686                 }
1687                 skb_queue_tail(&sk->write_queue, buff);
1688         } 
1689         else 
1690         {
1691                 sk->sent_seq = sk->write_seq;
1692                 sk->prot->queue_xmit(sk, dev, buff, 0);
1693         }
1694 
1695         if (sk->state == TCP_ESTABLISHED) 
1696                 sk->state = TCP_FIN_WAIT1;
1697         else if (sk->state == TCP_CLOSE_WAIT)
1698                 sk->state = TCP_LAST_ACK;
1699         else
1700                 sk->state = TCP_FIN_WAIT2;
1701 
1702         release_sock(sk);
1703 }
1704 
1705 
1706 static int
1707 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1708              int to_len, int nonblock, unsigned flags,
1709              struct sockaddr_in *addr, int *addr_len)
1710 {
1711         int result;
1712   
1713         /* 
1714          *      Have to check these first unlike the old code. If 
1715          *      we check them after we lose data on an error
1716          *      which is wrong 
1717          */
1718 
1719         if(addr_len)
1720                 *addr_len = sizeof(*addr);
1721         result=tcp_read(sk, to, to_len, nonblock, flags);
1722 
1723         if (result < 0) 
1724                 return(result);
1725   
1726         if(addr)
1727         {
1728                 addr->sin_family = AF_INET;
1729                 addr->sin_port = sk->dummy_th.dest;
1730                 addr->sin_addr.s_addr = sk->daddr;
1731         }
1732         return(result);
1733 }
1734 
1735 
1736 /*
1737  *      This routine will send an RST to the other tcp. 
1738  */
1739  
1740 static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1741           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1742 {
1743         struct sk_buff *buff;
1744         struct tcphdr *t1;
1745         int tmp;
1746         struct device *ndev=NULL;
1747   
1748 /*
1749  * We need to grab some memory, and put together an RST,
1750  * and then put it into the queue to be sent.
1751  */
1752 
1753         buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1754         if (buff == NULL) 
1755                 return;
1756 
1757         buff->len = sizeof(*t1);
1758         buff->sk = NULL;
1759         buff->dev = dev;
1760         buff->localroute = 0;
1761 
1762         t1 =(struct tcphdr *) buff->data;
1763 
1764         /*
1765          *      Put in the IP header and routing stuff. 
1766          */
1767 
1768         tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
1769                            sizeof(struct tcphdr),tos,ttl);
1770         if (tmp < 0) 
1771         {
1772                 buff->free = 1;
1773                 prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1774                 return;
1775         }
1776 
1777         t1 =(struct tcphdr *)((char *)t1 +tmp);
1778         buff->len += tmp;
1779         memcpy(t1, th, sizeof(*t1));
1780 
1781         /*
1782          *      Swap the send and the receive. 
1783          */
1784 
1785         t1->dest = th->source;
1786         t1->source = th->dest;
1787         t1->rst = 1;  
1788         t1->window = 0;
1789   
1790         if(th->ack)
1791         {
1792                 t1->ack = 0;
1793                 t1->seq = th->ack_seq;
1794                 t1->ack_seq = 0;
1795         }
1796         else
1797         {
1798                 t1->ack = 1;
1799                 if(!th->syn)
1800                         t1->ack_seq=htonl(th->seq);
1801                 else
1802                         t1->ack_seq=htonl(th->seq+1);
1803                 t1->seq=0;
1804         }
1805 
1806         t1->syn = 0;
1807         t1->urg = 0;
1808         t1->fin = 0;
1809         t1->psh = 0;
1810         t1->doff = sizeof(*t1)/4;
1811         tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1812         prot->queue_xmit(NULL, dev, buff, 1);
1813         tcp_statistics.TcpOutSegs++;
1814 }
1815 
1816 
1817 /*
1818  *      Look for tcp options. Parses everything but only knows about MSS.
1819  *      This routine is always called with the packet containing the SYN.
1820  *      However it may also be called with the ack to the SYN.  So you
1821  *      can't assume this is always the SYN.  It's always called after
1822  *      we have set up sk->mtu to our own MTU.
1823  */
1824  
1825 static void tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1826 {
1827         unsigned char *ptr;
1828         int length=(th->doff*4)-sizeof(struct tcphdr);
1829         int mss_seen = 0;
1830     
1831         ptr = (unsigned char *)(th + 1);
1832   
1833         while(length>0)
1834         {
1835                 int opcode=*ptr++;
1836                 int opsize=*ptr++;
1837                 switch(opcode)
1838                 {
1839                         case TCPOPT_EOL:
1840                                 return;
1841                         case TCPOPT_NOP:
1842                                 length-=2;
1843                                 continue;
1844                         
1845                         default:
1846                                 if(opsize<=2)   /* Avoid silly options looping forever */
1847                                         return;
1848                                 switch(opcode)
1849                                 {
1850                                         case TCPOPT_MSS:
1851                                                 if(opsize==4 && th->syn)
1852                                                 {
1853                                                         sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1854                                                         mss_seen = 1;
1855                                                 }
1856                                                 break;
1857                                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1858                                 }
1859                                 ptr+=opsize-2;
1860                                 length-=opsize;
1861                 }
1862         }
1863         if (th->syn) 
1864         {
1865                 if (! mss_seen)
1866                       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1867         }
1868 #ifdef CONFIG_INET_PCTCP
1869         sk->mss = min(sk->max_window >> 1, sk->mtu);
1870 #else    
1871         sk->mss = min(sk->max_window, sk->mtu);
1872 #endif  
1873 }
1874 
1875 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1876 {
1877         dst = ntohl(dst);
1878         if (IN_CLASSA(dst))
1879                 return htonl(IN_CLASSA_NET);
1880         if (IN_CLASSB(dst))
1881                 return htonl(IN_CLASSB_NET);
1882         return htonl(IN_CLASSC_NET);
1883 }
1884 
1885 /*
1886  *      This routine handles a connection request.
1887  *      It should make sure we haven't already responded.
1888  *      Because of the way BSD works, we have to send a syn/ack now.
1889  *      This also means it will be harder to close a socket which is
1890  *      listening.
1891  */
1892  
1893 static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1894                  unsigned long daddr, unsigned long saddr,
1895                  struct options *opt, struct device *dev)
1896 {
1897         struct sk_buff *buff;
1898         struct tcphdr *t1;
1899         unsigned char *ptr;
1900         struct sock *newsk;
1901         struct tcphdr *th;
1902         struct device *ndev=NULL;
1903         int tmp;
1904         struct rtable *rt;
1905   
1906         th = skb->h.th;
1907 
1908         /* If the socket is dead, don't accept the connection. */
1909         if (!sk->dead) 
1910         {
1911                 sk->data_ready(sk,0);
1912         }
1913         else 
1914         {
1915                 tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1916                 tcp_statistics.TcpAttemptFails++;
1917                 kfree_skb(skb, FREE_READ);
1918                 return;
1919         }
1920 
1921         /*
1922          * Make sure we can accept more.  This will prevent a
1923          * flurry of syns from eating up all our memory.
1924          */
1925 
1926         if (sk->ack_backlog >= sk->max_ack_backlog) 
1927         {
1928                 tcp_statistics.TcpAttemptFails++;
1929                 kfree_skb(skb, FREE_READ);
1930                 return;
1931         }
1932 
1933         /*
1934          * We need to build a new sock struct.
1935          * It is sort of bad to have a socket without an inode attached
1936          * to it, but the wake_up's will just wake up the listening socket,
1937          * and if the listening socket is destroyed before this is taken
1938          * off of the queue, this will take care of it.
1939          */
1940 
1941         newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
1942         if (newsk == NULL) 
1943         {
1944                 /* just ignore the syn.  It will get retransmitted. */
1945                 tcp_statistics.TcpAttemptFails++;
1946                 kfree_skb(skb, FREE_READ);
1947                 return;
1948         }
1949 
1950         memcpy(newsk, sk, sizeof(*newsk));
1951         skb_queue_head_init(&newsk->write_queue);
1952         skb_queue_head_init(&newsk->receive_queue);
1953         newsk->send_head = NULL;
1954         newsk->send_tail = NULL;
1955         skb_queue_head_init(&newsk->back_log);
1956         newsk->rtt = 0;         /*TCP_CONNECT_TIME<<3*/
1957         newsk->rto = TCP_TIMEOUT_INIT;
1958         newsk->mdev = 0;
1959         newsk->max_window = 0;
1960         newsk->cong_window = 1;
1961         newsk->cong_count = 0;
1962         newsk->ssthresh = 0;
1963         newsk->backoff = 0;
1964         newsk->blog = 0;
1965         newsk->intr = 0;
1966         newsk->proc = 0;
1967         newsk->done = 0;
1968         newsk->partial = NULL;
1969         newsk->pair = NULL;
1970         newsk->wmem_alloc = 0;
1971         newsk->rmem_alloc = 0;
1972         newsk->localroute = sk->localroute;
1973 
1974         newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
1975 
1976         newsk->err = 0;
1977         newsk->shutdown = 0;
1978         newsk->ack_backlog = 0;
1979         newsk->acked_seq = skb->h.th->seq+1;
1980         newsk->fin_seq = skb->h.th->seq;
1981         newsk->copied_seq = skb->h.th->seq;
1982         newsk->state = TCP_SYN_RECV;
1983         newsk->timeout = 0;
1984         newsk->write_seq = jiffies * SEQ_TICK - seq_offset;
1985         newsk->window_seq = newsk->write_seq;
1986         newsk->rcv_ack_seq = newsk->write_seq;
1987         newsk->urg_data = 0;
1988         newsk->retransmits = 0;
1989         newsk->destroy = 0;
1990         newsk->timer.data = (unsigned long)newsk;
1991         newsk->timer.function = &net_timer;
1992         newsk->dummy_th.source = skb->h.th->dest;
1993         newsk->dummy_th.dest = skb->h.th->source;
1994         
1995         /*
1996          *      Swap these two, they are from our point of view. 
1997          */
1998          
1999         newsk->daddr = saddr;
2000         newsk->saddr = daddr;
2001 
2002         put_sock(newsk->num,newsk);
2003         newsk->dummy_th.res1 = 0;
2004         newsk->dummy_th.doff = 6;
2005         newsk->dummy_th.fin = 0;
2006         newsk->dummy_th.syn = 0;
2007         newsk->dummy_th.rst = 0;        
2008         newsk->dummy_th.psh = 0;
2009         newsk->dummy_th.ack = 0;
2010         newsk->dummy_th.urg = 0;
2011         newsk->dummy_th.res2 = 0;
2012         newsk->acked_seq = skb->h.th->seq + 1;
2013         newsk->copied_seq = skb->h.th->seq;
2014 
2015         /*
2016          *      Grab the ttl and tos values and use them 
2017          */
2018 
2019         newsk->ip_ttl=sk->ip_ttl;
2020         newsk->ip_tos=skb->ip_hdr->tos;
2021 
2022         /*
2023          *      Use 512 or whatever user asked for 
2024          */
2025 
2026         /*
2027          *      Note use of sk->user_mss, since user has no direct access to newsk 
2028          */
2029 
2030         rt=ip_rt_route(saddr, NULL,NULL);
2031         
2032         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
2033                 newsk->window_clamp = rt->rt_window;
2034         else
2035                 newsk->window_clamp = 0;
2036                 
2037         if (sk->user_mss)
2038                 newsk->mtu = sk->user_mss;
2039         else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
2040                 newsk->mtu = rt->rt_mss - HEADER_SIZE;
2041         else 
2042         {
2043 #ifdef CONFIG_INET_SNARL        /* Sub Nets ARe Local */
2044                 if ((saddr ^ daddr) & default_mask(saddr))
2045 #else
2046                 if ((saddr ^ daddr) & dev->pa_mask)
2047 #endif
2048                         newsk->mtu = 576 - HEADER_SIZE;
2049                 else
2050                         newsk->mtu = MAX_WINDOW;
2051         }
2052 
2053         /*
2054          *      But not bigger than device MTU 
2055          */
2056 
2057         newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
2058 
2059         /*
2060          *      This will min with what arrived in the packet 
2061          */
2062 
2063         tcp_options(newsk,skb->h.th);
2064 
2065         buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
2066         if (buff == NULL) 
2067         {
2068                 sk->err = -ENOMEM;
2069                 newsk->dead = 1;
2070                 release_sock(newsk);
2071                 kfree_skb(skb, FREE_READ);
2072                 tcp_statistics.TcpAttemptFails++;
2073                 return;
2074         }
2075   
2076         buff->len = sizeof(struct tcphdr)+4;
2077         buff->sk = newsk;
2078         buff->localroute = newsk->localroute;
2079 
2080         t1 =(struct tcphdr *) buff->data;
2081 
2082         /*
2083          *      Put in the IP header and routing stuff. 
2084          */
2085 
2086         tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
2087                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2088 
2089         /*
2090          *      Something went wrong. 
2091          */
2092 
2093         if (tmp < 0) 
2094         {
2095                 sk->err = tmp;
2096                 buff->free=1;
2097                 kfree_skb(buff,FREE_WRITE);
2098                 newsk->dead = 1;
2099                 release_sock(newsk);
2100                 skb->sk = sk;
2101                 kfree_skb(skb, FREE_READ);
2102                 tcp_statistics.TcpAttemptFails++;
2103                 return;
2104         }
2105 
2106         buff->len += tmp;
2107         t1 =(struct tcphdr *)((char *)t1 +tmp);
2108   
2109         memcpy(t1, skb->h.th, sizeof(*t1));
2110         buff->h.seq = newsk->write_seq;
2111         /*
2112          *      Swap the send and the receive. 
2113          */
2114         t1->dest = skb->h.th->source;
2115         t1->source = newsk->dummy_th.source;
2116         t1->seq = ntohl(newsk->write_seq++);
2117         t1->ack = 1;
2118         newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/
2119         newsk->sent_seq = newsk->write_seq;
2120         t1->window = ntohs(newsk->window);
2121         t1->res1 = 0;
2122         t1->res2 = 0;
2123         t1->rst = 0;
2124         t1->urg = 0;
2125         t1->psh = 0;
2126         t1->syn = 1;
2127         t1->ack_seq = ntohl(skb->h.th->seq+1);
2128         t1->doff = sizeof(*t1)/4+1;
2129         ptr =(unsigned char *)(t1+1);
2130         ptr[0] = 2;
2131         ptr[1] = 4;
2132         ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2133         ptr[3] =(newsk->mtu) & 0xff;
2134 
2135         tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2136         newsk->prot->queue_xmit(newsk, dev, buff, 0);
2137 
2138         reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_TIMEOUT_INIT);
2139         skb->sk = newsk;
2140 
2141         /*
2142          *      Charge the sock_buff to newsk. 
2143          */
2144          
2145         sk->rmem_alloc -= skb->mem_len;
2146         newsk->rmem_alloc += skb->mem_len;
2147         
2148         skb_queue_tail(&sk->receive_queue,skb);
2149         sk->ack_backlog++;
2150         release_sock(newsk);
2151         tcp_statistics.TcpOutSegs++;
2152 }
2153 
2154 
2155 static void tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2156 {
2157         struct sk_buff *buff;
2158         int need_reset = 0;
2159         struct tcphdr *t1, *th;
2160         struct proto *prot;
2161         struct device *dev=NULL;
2162         int tmp;
2163 
2164         /*
2165          * We need to grab some memory, and put together a FIN, 
2166          * and then put it into the queue to be sent.
2167          */
2168         sk->inuse = 1;
2169         sk->keepopen = 1;
2170         sk->shutdown = SHUTDOWN_MASK;
2171 
2172         if (!sk->dead) 
2173                 sk->state_change(sk);
2174 
2175         /*
2176          *      We need to flush the recv. buffs. 
2177          */
2178 
2179         if (skb_peek(&sk->receive_queue) != NULL) 
2180         {
2181                 struct sk_buff *skb;
2182                 if(sk->debug)
2183                         printk("Clean rcv queue\n");
2184                 while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
2185                 {
2186                         /* The +1 is not needed because the FIN takes up sequence space and
2187                            is not read!!! */
2188                         if(skb->len > 0 && after(skb->h.th->seq + skb->len/* + 1 */ , sk->copied_seq))
2189                                 need_reset = 1;
2190                         kfree_skb(skb, FREE_READ);
2191                 }
2192                 if(sk->debug)
2193                         printk("Cleaned.\n");
2194         }
2195 
2196         /*
2197          *      Get rid off any half-completed packets. 
2198          */
2199          
2200         if (sk->partial) 
2201         {
2202                 tcp_send_partial(sk);
2203         }
2204 
2205         switch(sk->state) 
2206         {
2207                 case TCP_FIN_WAIT1:
2208                 case TCP_FIN_WAIT2:
2209                 case TCP_CLOSING:
2210                         /*
2211                          * These states occur when we have already closed out
2212                          * our end.  If there is no timeout, we do not do
2213                          * anything.  We may still be in the middle of sending
2214                          * the remainder of our buffer, for example...
2215                          * resetting the timer would be inappropriate.
2216                          *
2217                          * XXX if retransmit count reaches limit, is tcp_close()
2218                          * called with timeout == 1 ? if not, we need to fix that.
2219                          */
2220 #ifdef NOTDEF
2221                         /* 
2222                          *      Start a timer.
2223                          * original code was 4 * sk->rtt.  In converting to the
2224                          * new rtt representation, we can't quite use that.
2225                          * it seems to make most sense to  use the backed off value
2226                          */
2227                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2228 #endif
2229                         if (timeout) 
2230                                 tcp_time_wait(sk);
2231                         release_sock(sk);
2232                         return; /* break causes a double release - messy */
2233                 case TCP_TIME_WAIT:
2234                 case TCP_LAST_ACK:
2235                         /*
2236                          * A timeout from these states terminates the TCB.
2237                          */
2238                         if (timeout) 
2239                         {
2240                                 sk->state = TCP_CLOSE;
2241                         }
2242                         release_sock(sk);
2243                         return;
2244                 case TCP_LISTEN:
2245                         sk->state = TCP_CLOSE;
2246                         release_sock(sk);
2247                         return;
2248                 case TCP_CLOSE:
2249                         release_sock(sk);
2250                         return;
2251                 case TCP_CLOSE_WAIT:
2252                 case TCP_ESTABLISHED:
2253                 case TCP_SYN_SENT:
2254                 case TCP_SYN_RECV:
2255                         prot =(struct proto *)sk->prot;
2256                         th =(struct tcphdr *)&sk->dummy_th;
2257                         buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2258                         if (buff == NULL) 
2259                         {
2260                                 /* This will force it to try again later. */
2261                                 /* Or it would have if someone released the socket
2262                                    first. Anyway it might work now */
2263                                 release_sock(sk);
2264                                 if (sk->state != TCP_CLOSE_WAIT)
2265                                         sk->state = TCP_ESTABLISHED;
2266                                 reset_timer(sk, TIME_CLOSE, 100);
2267                                 return;
2268                         }
2269                         buff->sk = sk;
2270                         buff->free = 1;
2271                         buff->len = sizeof(*t1);
2272                         buff->localroute = sk->localroute;
2273                         t1 =(struct tcphdr *) buff->data;
2274         
2275                         /*
2276                          *      Put in the IP header and routing stuff. 
2277                          */
2278                         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2279                                          IPPROTO_TCP, sk->opt,
2280                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2281                         if (tmp < 0) 
2282                         {
2283                                 sk->write_seq++;        /* Very important 8) */
2284                                 kfree_skb(buff,FREE_WRITE);
2285 
2286                                 /*
2287                                  * Enter FIN_WAIT1 to await completion of
2288                                  * written out data and ACK to our FIN.
2289                                  */
2290 
2291                                 if(sk->state==TCP_ESTABLISHED)
2292                                         sk->state=TCP_FIN_WAIT1;
2293                                 else
2294                                         sk->state=TCP_FIN_WAIT2;
2295                                 reset_timer(sk, TIME_CLOSE,4*sk->rto);
2296                                 if(timeout)
2297                                         tcp_time_wait(sk);
2298 
2299                                 release_sock(sk);
2300                                 return;
2301                         }
2302 
2303                         t1 =(struct tcphdr *)((char *)t1 +tmp);
2304                         buff->len += tmp;
2305                         buff->dev = dev;
2306                         memcpy(t1, th, sizeof(*t1));
2307                         t1->seq = ntohl(sk->write_seq);
2308                         sk->write_seq++;
2309                         buff->h.seq = sk->write_seq;
2310                         t1->ack = 1;
2311         
2312                         /* 
2313                          *      Ack everything immediately from now on. 
2314                          */
2315 
2316                         sk->delay_acks = 0;
2317                         t1->ack_seq = ntohl(sk->acked_seq);
2318                         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
2319                         t1->fin = 1;
2320                         t1->rst = need_reset;
2321                         t1->doff = sizeof(*t1)/4;
2322                         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2323 
2324                         tcp_statistics.TcpOutSegs++;
2325         
2326                         if (skb_peek(&sk->write_queue) == NULL) 
2327                         {
2328                                 sk->sent_seq = sk->write_seq;
2329                                 prot->queue_xmit(sk, dev, buff, 0);
2330                         } 
2331                         else 
2332                         {
2333                                 reset_timer(sk, TIME_WRITE, sk->rto);
2334                                 if (buff->next != NULL) 
2335                                 {
2336                                         printk("tcp_close: next != NULL\n");
2337                                         skb_unlink(buff);
2338                                 }
2339                                 skb_queue_tail(&sk->write_queue, buff);
2340                         }
2341 
2342                         /*
2343                          * If established (normal close), enter FIN_WAIT1.
2344                          * If in CLOSE_WAIT, enter LAST_ACK
2345                          * If in CLOSING, remain in CLOSING
2346                          * otherwise enter FIN_WAIT2
2347                          */
2348 
2349                         if (sk->state == TCP_ESTABLISHED)
2350                             sk->state = TCP_FIN_WAIT1;
2351                         else if (sk->state == TCP_CLOSE_WAIT)
2352                             sk->state = TCP_LAST_ACK;
2353                         else if (sk->state != TCP_CLOSING)
2354                             sk->state = TCP_FIN_WAIT2;
2355         }
2356         release_sock(sk);
2357 }
2358 
2359 
2360 /*
2361  * This routine takes stuff off of the write queue,
2362  * and puts it in the xmit queue.
2363  */
2364 static void
2365 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2366 {
2367         struct sk_buff *skb;
2368 
2369         /*
2370          *      The bytes will have to remain here. In time closedown will
2371          *      empty the write queue and all will be happy 
2372          */
2373 
2374         if(sk->zapped)
2375                 return;
2376 
2377         while((skb = skb_peek(&sk->write_queue)) != NULL &&
2378                 before(skb->h.seq, sk->window_seq + 1) &&
2379                 (sk->retransmits == 0 ||
2380                  sk->timeout != TIME_WRITE ||
2381                  before(skb->h.seq, sk->rcv_ack_seq + 1))
2382                 && sk->packets_out < sk->cong_window) 
2383         {
2384                 IS_SKB(skb);
2385                 skb_unlink(skb);
2386                 /* See if we really need to send the packet. */
2387                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) 
2388                 {
2389                         sk->retransmits = 0;
2390                         kfree_skb(skb, FREE_WRITE);
2391                         if (!sk->dead) 
2392                                 sk->write_space(sk);
2393                 } 
2394                 else
2395                 {
2396                         struct tcphdr *th;
2397                         struct iphdr *iph;
2398                         int size;
2399 /*
2400  * put in the ack seq and window at this point rather than earlier,
2401  * in order to keep them monotonic.  We really want to avoid taking
2402  * back window allocations.  That's legal, but RFC1122 says it's frowned on.
2403  * Ack and window will in general have changed since this packet was put
2404  * on the write queue.
2405  */
2406                         iph = (struct iphdr *)(skb->data +
2407                                                skb->dev->hard_header_len);
2408                         th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
2409                         size = skb->len - (((unsigned char *) th) - skb->data);
2410                         
2411                         th->ack_seq = ntohl(sk->acked_seq);
2412                         th->window = ntohs(tcp_select_window(sk));
2413 
2414                         tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
2415 
2416                         sk->sent_seq = skb->h.seq;
2417                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2418                 }
2419         }
2420 }
2421 
2422 
2423 /*
2424  *      This routine sorts the send list, and resets the
2425  *      sk->send_head and sk->send_tail pointers.
2426  */
2427 
2428 static void sort_send(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2429 {
2430         struct sk_buff *list = NULL;
2431         struct sk_buff *skb,*skb2,*skb3;
2432 
2433         for (skb = sk->send_head; skb != NULL; skb = skb2) 
2434         {
2435                 skb2 = skb->link3;
2436                 if (list == NULL || before (skb2->h.seq, list->h.seq)) 
2437                 {
2438                         skb->link3 = list;
2439                         sk->send_tail = skb;
2440                         list = skb;
2441                 }
2442                 else
2443                 {
2444                         for (skb3 = list; ; skb3 = skb3->link3) 
2445                         {
2446                                 if (skb3->link3 == NULL ||
2447                                     before(skb->h.seq, skb3->link3->h.seq))
2448                                 {
2449                                         skb->link3 = skb3->link3;
2450                                         skb3->link3 = skb;
2451                                         if (skb->link3 == NULL) 
2452                                                 sk->send_tail = skb;
2453                                         break;
2454                                 }
2455                         }
2456                 }
2457         }
2458         sk->send_head = list;
2459 }
2460   
2461 
2462 /*
2463  *      This routine deals with incoming acks, but not outgoing ones.
2464  */
2465 
2466 static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2467 {
2468         unsigned long ack;
2469         int flag = 0;
2470 
2471         /* 
2472          * 1 - there was data in packet as well as ack or new data is sent or 
2473          *     in shutdown state
2474          * 2 - data from retransmit queue was acked and removed
2475          * 4 - window shrunk or data from retransmit queue was acked and removed
2476          */
2477 
2478         if(sk->zapped)
2479                 return(1);      /* Dead, cant ack any more so why bother */
2480 
2481         ack = ntohl(th->ack_seq);
2482         if (ntohs(th->window) > sk->max_window) 
2483         {
2484                 sk->max_window = ntohs(th->window);
2485 #ifdef CONFIG_INET_PCTCP
2486                 sk->mss = min(sk->max_window>>1, sk->mtu);
2487 #else
2488                 sk->mss = min(sk->max_window, sk->mtu);
2489 #endif  
2490         }
2491 
2492         if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2493                 sk->retransmits = 0;
2494 
2495 #if 0
2496 /*
2497  *      Not quite clear why the +1 and -1 here, and why not +1 in next line 
2498  */
2499  
2500         if (after(ack, sk->sent_seq+1) || before(ack, sk->rcv_ack_seq-1)) 
2501 #else   
2502         if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
2503 #endif  
2504         {
2505                 if(sk->debug)
2506                         printk("Ack ignored %lu %lu\n",ack,sk->sent_seq);
2507                 if (after(ack, sk->sent_seq) ||
2508                    (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) 
2509                 {
2510                         return(0);
2511                 }
2512                 if (sk->keepopen) 
2513                 {
2514                         reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2515                 }
2516                 return(1);
2517         }
2518 
2519         if (len != th->doff*4) 
2520                 flag |= 1;
2521 
2522         /* See if our window has been shrunk. */
2523 
2524         if (after(sk->window_seq, ack+ntohs(th->window))) 
2525         {
2526                 /*
2527                  * We may need to move packets from the send queue
2528                  * to the write queue, if the window has been shrunk on us.
2529                  * The RFC says you are not allowed to shrink your window
2530                  * like this, but if the other end does, you must be able
2531                  * to deal with it.
2532                  */
2533                 struct sk_buff *skb;
2534                 struct sk_buff *skb2;
2535                 struct sk_buff *wskb = NULL;
2536         
2537                 skb2 = sk->send_head;
2538                 sk->send_head = NULL;
2539                 sk->send_tail = NULL;
2540         
2541                 flag |= 4;
2542         
2543                 sk->window_seq = ack + ntohs(th->window);
2544                 cli();
2545                 while (skb2 != NULL) 
2546                 {
2547                         skb = skb2;
2548                         skb2 = skb->link3;
2549                         skb->link3 = NULL;
2550                         if (after(skb->h.seq, sk->window_seq)) 
2551                         {
2552                                 if (sk->packets_out > 0) 
2553                                         sk->packets_out--;
2554                                 /* We may need to remove this from the dev send list. */
2555                                 if (skb->next != NULL) 
2556                                 {
2557                                         skb_unlink(skb);                                
2558                                 }
2559                                 /* Now add it to the write_queue. */
2560                                 if (wskb == NULL)
2561                                         skb_queue_head(&sk->write_queue,skb);
2562                                 else
2563                                         skb_append(wskb,skb);
2564                                 wskb = skb;
2565                         } 
2566                         else 
2567                         {
2568                                 if (sk->send_head == NULL) 
2569                                 {
2570                                         sk->send_head = skb;
2571                                         sk->send_tail = skb;
2572                                 }
2573                                 else
2574                                 {
2575                                         sk->send_tail->link3 = skb;
2576                                         sk->send_tail = skb;
2577                                 }
2578                                 skb->link3 = NULL;
2579                         }
2580                 }
2581                 sti();
2582         }
2583 
2584         if (sk->send_tail == NULL || sk->send_head == NULL) 
2585         {
2586                 sk->send_head = NULL;
2587                 sk->send_tail = NULL;
2588                 sk->packets_out= 0;
2589         }
2590 
2591         sk->window_seq = ack + ntohs(th->window);
2592 
2593         /* We don't want too many packets out there. */
2594         if (sk->timeout == TIME_WRITE && 
2595                 sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
2596         {
2597 /* 
2598  * This is Jacobson's slow start and congestion avoidance. 
2599  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2600  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2601  * counter and increment it once every cwnd times.  It's possible
2602  * that this should be done only if sk->retransmits == 0.  I'm
2603  * interpreting "new data is acked" as including data that has
2604  * been retransmitted but is just now being acked.
2605  */
2606                 if (sk->cong_window < sk->ssthresh)  
2607                   /* 
2608                    *    In "safe" area, increase
2609                    */
2610                         sk->cong_window++;
2611                 else 
2612                 {
2613                   /*
2614                    *    In dangerous area, increase slowly.  In theory this is
2615                    *    sk->cong_window += 1 / sk->cong_window
2616                    */
2617                         if (sk->cong_count >= sk->cong_window) 
2618                         {
2619                                 sk->cong_window++;
2620                                 sk->cong_count = 0;
2621                         }
2622                         else 
2623                                 sk->cong_count++;
2624                 }
2625         }
2626 
2627         sk->rcv_ack_seq = ack;
2628 
2629         /*
2630          * if this ack opens up a zero window, clear backoff.  It was
2631          * being used to time the probes, and is probably far higher than
2632          * it needs to be for normal retransmission.
2633          */
2634 
2635         if (sk->timeout == TIME_PROBE0) 
2636         {
2637                 if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
2638                     ! before (sk->window_seq, sk->write_queue.next->h.seq)) 
2639                 {
2640                         sk->retransmits = 0;
2641                         sk->backoff = 0;
2642                   /*
2643                    *    Recompute rto from rtt.  this eliminates any backoff.
2644                    */
2645 
2646                         sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2647                         if (sk->rto > 120*HZ)
2648                                 sk->rto = 120*HZ;
2649                         if (sk->rto < 20)       /* Was 1*HZ, then 1 - turns out we must allow about
2650                                                    .2 of a second because of BSD delayed acks - on a 100Mb/sec link
2651                                                    .2 of a second is going to need huge windows (SIGH) */
2652                                 sk->rto = 20;
2653                 }
2654         }
2655 
2656   /* 
2657    *    See if we can take anything off of the retransmit queue.
2658    */
2659    
2660         while(sk->send_head != NULL) 
2661         {
2662                 /* Check for a bug. */
2663                 if (sk->send_head->link3 &&
2664                     after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) 
2665                 {
2666                         printk("INET: tcp.c: *** bug send_list out of order.\n");
2667                         sort_send(sk);
2668                 }
2669 
2670                 if (before(sk->send_head->h.seq, ack+1)) 
2671                 {
2672                         struct sk_buff *oskb;   
2673                         if (sk->retransmits) 
2674                         {       
2675                                 /*
2676                                  *      We were retransmitting.  don't count this in RTT est 
2677                                  */
2678                                 flag |= 2;
2679 
2680                                 /*
2681                                  * even though we've gotten an ack, we're still
2682                                  * retransmitting as long as we're sending from
2683                                  * the retransmit queue.  Keeping retransmits non-zero
2684                                  * prevents us from getting new data interspersed with
2685                                  * retransmissions.
2686                                  */
2687 
2688                                 if (sk->send_head->link3)
2689                                         sk->retransmits = 1;
2690                                 else
2691                                         sk->retransmits = 0;
2692                         }
2693                         /*
2694                          * Note that we only reset backoff and rto in the
2695                          * rtt recomputation code.  And that doesn't happen
2696                          * if there were retransmissions in effect.  So the
2697                          * first new packet after the retransmissions is
2698                          * sent with the backoff still in effect.  Not until
2699                          * we get an ack from a non-retransmitted packet do
2700                          * we reset the backoff and rto.  This allows us to deal
2701                          * with a situation where the network delay has increased
2702                          * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2703                          */
2704 
2705                         /*
2706                          *      We have one less packet out there. 
2707                          */
2708                          
2709                         if (sk->packets_out > 0) 
2710                                 sk->packets_out --;
2711                         /* 
2712                          *      Wake up the process, it can probably write more. 
2713                          */
2714                         if (!sk->dead) 
2715                                 sk->write_space(sk);
2716                         oskb = sk->send_head;
2717 
2718                         if (!(flag&2)) 
2719                         {
2720                                 long m;
2721         
2722                                 /*
2723                                  *      The following amusing code comes from Jacobson's
2724                                  *      article in SIGCOMM '88.  Note that rtt and mdev
2725                                  *      are scaled versions of rtt and mean deviation.
2726                                  *      This is designed to be as fast as possible 
2727                                  *      m stands for "measurement".
2728                                  */
2729         
2730                                 m = jiffies - oskb->when;  /* RTT */
2731                                 if(m<=0)
2732                                         m=1;            /* IS THIS RIGHT FOR <0 ??? */
2733                                 m -= (sk->rtt >> 3);    /* m is now error in rtt est */
2734                                 sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
2735                                 if (m < 0)
2736                                         m = -m;         /* m is now abs(error) */
2737                                 m -= (sk->mdev >> 2);   /* similar update on mdev */
2738                                 sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
2739         
2740                                 /*
2741                                  *      Now update timeout.  Note that this removes any backoff.
2742                                  */
2743                          
2744                                 sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2745                                 if (sk->rto > 120*HZ)
2746                                         sk->rto = 120*HZ;
2747                                 if (sk->rto < 20)       /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
2748                                         sk->rto = 20;
2749                                 sk->backoff = 0;
2750                         }
2751                         flag |= (2|4);
2752                         cli();
2753                         oskb = sk->send_head;
2754                         IS_SKB(oskb);
2755                         sk->send_head = oskb->link3;
2756                         if (sk->send_head == NULL) 
2757                         {
2758                                 sk->send_tail = NULL;
2759                         }
2760 
2761                 /*
2762                  *      We may need to remove this from the dev send list. 
2763                  */
2764 
2765                         if (oskb->next)
2766                                 skb_unlink(oskb);
2767                         sti();
2768                         kfree_skb(oskb, FREE_WRITE); /* write. */
2769                         if (!sk->dead) 
2770                                 sk->write_space(sk);
2771                 }
2772                 else
2773                 {
2774                         break;
2775                 }
2776         }
2777 
2778         /*
2779          * Maybe we can take some stuff off of the write queue,
2780          * and put it onto the xmit queue.
2781          */
2782         if (skb_peek(&sk->write_queue) != NULL) 
2783         {
2784                 if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
2785                         (sk->retransmits == 0 || 
2786                          sk->timeout != TIME_WRITE ||
2787                          before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
2788                         && sk->packets_out < sk->cong_window) 
2789                 {
2790                         flag |= 1;
2791                         tcp_write_xmit(sk);
2792                 }
2793                 else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
2794                         sk->send_head == NULL &&
2795                         sk->ack_backlog == 0 &&
2796                         sk->state != TCP_TIME_WAIT) 
2797                 {
2798                         reset_timer(sk, TIME_PROBE0, sk->rto);
2799                 }               
2800         }
2801         else
2802         {
2803                 if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2804                 sk->state != TCP_TIME_WAIT && !sk->keepopen) 
2805                 {
2806                         if (!sk->dead)
2807                                 sk->write_space(sk);
2808                         if (sk->keepopen)
2809                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2810                         else
2811                                 delete_timer(sk);
2812                 }
2813                 else
2814                 {
2815                         if (sk->state != (unsigned char) sk->keepopen) 
2816                         {
2817                                 reset_timer(sk, TIME_WRITE, sk->rto);
2818                         }
2819                         if (sk->state == TCP_TIME_WAIT) 
2820                         {
2821                                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2822                         }       
2823                 }
2824         }
2825 
2826         if (sk->packets_out == 0 && sk->partial != NULL &&
2827                 skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
2828         {
2829                 flag |= 1;
2830                 tcp_send_partial(sk);
2831         }
2832 
2833         /*
2834          * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
2835          * we are now waiting for an acknowledge to our FIN.  The other end is
2836          * already in TIME_WAIT.
2837          *
2838          * Move to TCP_CLOSE on success.
2839          */
2840 
2841         if (sk->state == TCP_LAST_ACK) 
2842         {
2843                 if (!sk->dead)
2844                         sk->state_change(sk);
2845                 if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) 
2846                 {
2847                         flag |= 1;
2848                         sk->state = TCP_CLOSE;
2849                         sk->shutdown = SHUTDOWN_MASK;
2850                 }
2851         }
2852 
2853         /*
2854          * Incomming ACK to a FIN we sent in the case of our initiating the close.
2855          *
2856          * Move to FIN_WAIT2 to await a FIN from the other end.
2857          */
2858 
2859         if (sk->state == TCP_FIN_WAIT1) 
2860         {
2861 
2862                 if (!sk->dead) 
2863                         sk->state_change(sk);
2864                 if (sk->rcv_ack_seq == sk->write_seq) 
2865                 {
2866                         flag |= 1;
2867                         if (sk->acked_seq != sk->fin_seq) 
2868                         {
2869                                 tcp_time_wait(sk);
2870                         }
2871                         else
2872                         {
2873                                 sk->shutdown = SHUTDOWN_MASK;
2874                                 sk->state = TCP_FIN_WAIT2;
2875                         }
2876                 }
2877         }
2878 
2879         /*
2880          *      Incoming ACK to a FIN we sent in the case of a simultanious close.
2881          *
2882          *      Move to TIME_WAIT
2883          */
2884 
2885         if (sk->state == TCP_CLOSING) 
2886         {
2887 
2888                 if (!sk->dead) 
2889                         sk->state_change(sk);
2890                 if (sk->rcv_ack_seq == sk->write_seq) 
2891                 {
2892                         flag |= 1;
2893                         tcp_time_wait(sk);
2894                 }
2895         }
2896 
2897         /*
2898          * I make no guarantees about the first clause in the following
2899          * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
2900          * what conditions "!flag" would be true.  However I think the rest
2901          * of the conditions would prevent that from causing any
2902          * unnecessary retransmission. 
2903          *   Clearly if the first packet has expired it should be 
2904          * retransmitted.  The other alternative, "flag&2 && retransmits", is
2905          * harder to explain:  You have to look carefully at how and when the
2906          * timer is set and with what timeout.  The most recent transmission always
2907          * sets the timer.  So in general if the most recent thing has timed
2908          * out, everything before it has as well.  So we want to go ahead and
2909          * retransmit some more.  If we didn't explicitly test for this
2910          * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
2911          * would not be true.  If you look at the pattern of timing, you can
2912          * show that rto is increased fast enough that the next packet would
2913          * almost never be retransmitted immediately.  Then you'd end up
2914          * waiting for a timeout to send each packet on the retranmission
2915          * queue.  With my implementation of the Karn sampling algorithm,
2916          * the timeout would double each time.  The net result is that it would
2917          * take a hideous amount of time to recover from a single dropped packet.
2918          * It's possible that there should also be a test for TIME_WRITE, but
2919          * I think as long as "send_head != NULL" and "retransmit" is on, we've
2920          * got to be in real retransmission mode.
2921          *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
2922          * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
2923          * As long as no further losses occur, this seems reasonable.
2924          */
2925         
2926         if (((!flag) || (flag&4)) && sk->send_head != NULL &&
2927                (((flag&2) && sk->retransmits) ||
2928                (sk->send_head->when + sk->rto < jiffies))) 
2929         {
2930                 ip_do_retransmit(sk, 1);
2931                 reset_timer(sk, TIME_WRITE, sk->rto);
2932         }
2933 
2934         return(1);
2935 }
2936 
2937 
2938 /*
2939  *      This routine handles the data.  If there is room in the buffer,
2940  *      it will be have already been moved into it.  If there is no
2941  *      room, then we will just have to discard the packet.
2942  */
2943 
2944 static int tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
2945          unsigned long saddr, unsigned short len)
2946 {
2947         struct sk_buff *skb1, *skb2;
2948         struct tcphdr *th;
2949         int dup_dumped=0;
2950         unsigned long new_seq;
2951 
2952         th = skb->h.th;
2953         skb->len = len -(th->doff*4);
2954 
2955         /* The bytes in the receive read/assembly queue has increased. Needed for the
2956            low memory discard algorithm */
2957            
2958         sk->bytes_rcv += skb->len;
2959         
2960         if (skb->len == 0 && !th->fin && !th->urg && !th->psh) 
2961         {
2962                 /* 
2963                  *      Don't want to keep passing ack's back and forth. 
2964                  *      (someone sent us dataless, boring frame)
2965                  */
2966                 if (!th->ack)
2967                         tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
2968                 kfree_skb(skb, FREE_READ);
2969                 return(0);
2970         }
2971         
2972         /*
2973          *      We no longer have anyone receiving data on this connection.
2974          */
2975 
2976         if(sk->shutdown & RCV_SHUTDOWN)
2977         {
2978                 new_seq= th->seq + skb->len + th->syn;  /* Right edge of _data_ part of frame */
2979                 
2980                 if(after(new_seq,sk->copied_seq+1))     /* If the right edge of this frame is after the last copied byte
2981                                                            then it contains data we will never touch. We send an RST to 
2982                                                            ensure the far end knows it never got to the application */
2983                 {
2984                         sk->acked_seq = new_seq + th->fin;
2985                         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
2986                                 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
2987                         tcp_statistics.TcpEstabResets++;
2988                         sk->state = TCP_CLOSE;
2989                         sk->err = EPIPE;
2990                         sk->shutdown = SHUTDOWN_MASK;
2991                         kfree_skb(skb, FREE_READ);
2992                         if (!sk->dead)
2993                                 sk->state_change(sk);
2994                         return(0);
2995                 }
2996 #if 0           
2997                 /* Discard the frame here - we've already proved its a duplicate */
2998                 
2999                 kfree_skb(skb, FREE_READ);
3000                 return(0);                              
3001 #endif          
3002         }
3003         /*
3004          *      Now we have to walk the chain, and figure out where this one
3005          *      goes into it.  This is set up so that the last packet we received
3006          *      will be the first one we look at, that way if everything comes
3007          *      in order, there will be no performance loss, and if they come
3008          *      out of order we will be able to fit things in nicely.
3009          */
3010 
3011         /* 
3012          *      This should start at the last one, and then go around forwards.
3013          */
3014 
3015         if (skb_peek(&sk->receive_queue) == NULL)       /* Empty queue is easy case */
3016         {
3017                 skb_queue_head(&sk->receive_queue,skb);
3018                 skb1= NULL;
3019         } 
3020         else
3021         {
3022                 for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
3023                 {
3024                         if(sk->debug)
3025                         {
3026                                 printk("skb1=%p :", skb1);
3027                                 printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
3028                                 printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
3029                                 printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
3030                                                 sk->acked_seq);
3031                         }
3032                         
3033                         /*
3034                          *      Optimisation: Duplicate frame or extension of previous frame from
3035                          *      same sequence point (lost ack case).
3036                          *      The frame contains duplicate data or replaces a previous frame
3037                          *      discard the previous frame (safe as sk->inuse is set) and put
3038                          *      the new one in its place.
3039                          */
3040                          
3041                         if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
3042                         {
3043                                 skb_append(skb1,skb);
3044                                 skb_unlink(skb1);
3045                                 kfree_skb(skb1,FREE_READ);
3046                                 dup_dumped=1;
3047                                 skb1=NULL;
3048                                 break;
3049                         }
3050                         
3051                         /*
3052                          *      Found where it fits
3053                          */
3054                          
3055                         if (after(th->seq+1, skb1->h.th->seq))
3056                         {
3057                                 skb_append(skb1,skb);
3058                                 break;
3059                         }
3060                         
3061                         /*
3062                          *      See if we've hit the start. If so insert.
3063                          */
3064                         if (skb1 == skb_peek(&sk->receive_queue))
3065                         {
3066                                 skb_queue_head(&sk->receive_queue, skb);
3067                                 break;
3068                         }
3069                 }
3070         }
3071 
3072         /*
3073          *      Figure out what the ack value for this frame is
3074          */
3075          
3076         th->ack_seq = th->seq + skb->len;
3077         if (th->syn) 
3078                 th->ack_seq++;
3079         if (th->fin)
3080                 th->ack_seq++;
3081 
3082         if (before(sk->acked_seq, sk->copied_seq)) 
3083         {
3084                 printk("*** tcp.c:tcp_data bug acked < copied\n");
3085                 sk->acked_seq = sk->copied_seq;
3086         }
3087 
3088         /*
3089          *      Now figure out if we can ack anything.
3090          */
3091 
3092         if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) 
3093         {
3094                 if (before(th->seq, sk->acked_seq+1)) 
3095                 {
3096                         int newwindow;
3097 
3098                         if (after(th->ack_seq, sk->acked_seq)) 
3099                         {
3100                                 newwindow = sk->window-(th->ack_seq - sk->acked_seq);
3101                                 if (newwindow < 0)
3102                                         newwindow = 0;  
3103                                 sk->window = newwindow;
3104                                 sk->acked_seq = th->ack_seq;
3105                         }
3106                         skb->acked = 1;
3107 
3108                         /* 
3109                          *      When we ack the fin, we turn on the RCV_SHUTDOWN flag.
3110                          */
3111 
3112                         if (skb->h.th->fin) 
3113                         {
3114                                 if (!sk->dead) 
3115                                         sk->state_change(sk);
3116                                 sk->shutdown |= RCV_SHUTDOWN;
3117                         }
3118           
3119                         for(skb2 = skb->next;
3120                             skb2 != (struct sk_buff *)&sk->receive_queue;
3121                             skb2 = skb2->next) 
3122                         {
3123                                 if (before(skb2->h.th->seq, sk->acked_seq+1)) 
3124                                 {
3125                                         if (after(skb2->h.th->ack_seq, sk->acked_seq))
3126                                         {
3127                                                 newwindow = sk->window -
3128                                                  (skb2->h.th->ack_seq - sk->acked_seq);
3129                                                 if (newwindow < 0)
3130                                                         newwindow = 0;  
3131                                                 sk->window = newwindow;
3132                                                 sk->acked_seq = skb2->h.th->ack_seq;
3133                                         }
3134                                         skb2->acked = 1;
3135                                         /*
3136                                          *      When we ack the fin, we turn on
3137                                          *      the RCV_SHUTDOWN flag.
3138                                          */
3139                                         if (skb2->h.th->fin) 
3140                                         {
3141                                                 sk->shutdown |= RCV_SHUTDOWN;
3142                                                 if (!sk->dead)
3143                                                         sk->state_change(sk);
3144                                         }
3145 
3146                                         /*
3147                                          *      Force an immediate ack.
3148                                          */
3149                                          
3150                                         sk->ack_backlog = sk->max_ack_backlog;
3151                                 }
3152                                 else
3153                                 {
3154                                         break;
3155                                 }
3156                         }
3157 
3158                         /*
3159                          *      This also takes care of updating the window.
3160                          *      This if statement needs to be simplified.
3161                          */
3162                         if (!sk->delay_acks ||
3163                             sk->ack_backlog >= sk->max_ack_backlog || 
3164                             sk->bytes_rcv > sk->max_unacked || th->fin) {
3165         /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
3166                         }
3167                         else 
3168                         {
3169                                 sk->ack_backlog++;
3170                                 if(sk->debug)
3171                                         printk("Ack queued.\n");
3172                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3173                         }
3174                 }
3175         }
3176 
3177         /*
3178          *      If we've missed a packet, send an ack.
3179          *      Also start a timer to send another.
3180          */
3181          
3182         if (!skb->acked) 
3183         {
3184         
3185         /*
3186          *      This is important.  If we don't have much room left,
3187          *      we need to throw out a few packets so we have a good
3188          *      window.  Note that mtu is used, not mss, because mss is really
3189          *      for the send side.  He could be sending us stuff as large as mtu.
3190          */
3191                  
3192                 while (sk->prot->rspace(sk) < sk->mtu) 
3193                 {
3194                         skb1 = skb_peek(&sk->receive_queue);
3195                         if (skb1 == NULL) 
3196                         {
3197                                 printk("INET: tcp.c:tcp_data memory leak detected.\n");
3198                                 break;
3199                         }
3200 
3201                         /*
3202                          *      Don't throw out something that has been acked. 
3203                          */
3204                  
3205                         if (skb1->acked) 
3206                         {
3207                                 break;
3208                         }
3209                 
3210                         skb_unlink(skb1);
3211                         kfree_skb(skb1, FREE_READ);
3212                 }
3213                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3214                 sk->ack_backlog++;
3215                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3216         }
3217         else
3218         {
3219                 /* We missed a packet.  Send an ack to try to resync things. */
3220                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3221         }
3222 
3223         /*
3224          *      Now tell the user we may have some data. 
3225          */
3226          
3227         if (!sk->dead) 
3228         {
3229                 if(sk->debug)
3230                         printk("Data wakeup.\n");
3231                 sk->data_ready(sk,0);
3232         } 
3233         return(0);
3234 }
3235 
3236 
3237 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
3238 {
3239         unsigned long ptr = ntohs(th->urg_ptr);
3240 
3241         if (ptr)
3242                 ptr--;
3243         ptr += th->seq;
3244 
3245         /* ignore urgent data that we've already seen and read */
3246         if (after(sk->copied_seq+1, ptr))
3247                 return;
3248 
3249         /* do we already have a newer (or duplicate) urgent pointer? */
3250         if (sk->urg_data && !after(ptr, sk->urg_seq))
3251                 return;
3252 
3253         /* tell the world about our new urgent pointer */
3254         if (sk->proc != 0) {
3255                 if (sk->proc > 0) {
3256                         kill_proc(sk->proc, SIGURG, 1);
3257                 } else {
3258                         kill_pg(-sk->proc, SIGURG, 1);
3259                 }
3260         }
3261         sk->urg_data = URG_NOTYET;
3262         sk->urg_seq = ptr;
3263 }
3264 
3265 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
3266         unsigned long saddr, unsigned long len)
3267 {
3268         unsigned long ptr;
3269 
3270         /* check if we get a new urgent pointer */
3271         if (th->urg)
3272                 tcp_check_urg(sk,th);
3273 
3274         /* do we wait for any urgent data? */
3275         if (sk->urg_data != URG_NOTYET)
3276                 return 0;
3277 
3278         /* is the urgent pointer pointing into this packet? */
3279         ptr = sk->urg_seq - th->seq + th->doff*4;
3280         if (ptr >= len)
3281                 return 0;
3282 
3283         /* ok, got the correct packet, update info */
3284         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
3285         if (!sk->dead)
3286                 sk->data_ready(sk,0);
3287         return 0;
3288 }
3289 
3290 
3291 /*
3292  *  This deals with incoming fins. 'Linus at 9 O'clock' 8-) 
3293  *
3294  *  If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
3295  *  (and thence onto LAST-ACK and finally, CLOSED, we never enter
3296  *  TIME-WAIT)
3297  *
3298  *  If we are in FINWAIT-1, a received FIN indicates simultanious
3299  *  close and we go into CLOSING (and later onto TIME-WAIT)
3300  *
3301  *  If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3302  *
3303  */
3304  
3305 static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
3306          unsigned long saddr, struct device *dev)
3307 {
3308         sk->fin_seq = th->seq + skb->len + th->syn + th->fin;
3309 
3310         if (!sk->dead) 
3311         {
3312                 sk->state_change(sk);
3313         }
3314 
3315         switch(sk->state) 
3316         {
3317                 case TCP_SYN_RECV:
3318                 case TCP_SYN_SENT:
3319                 case TCP_ESTABLISHED:
3320                         /*
3321                          * move to CLOSE_WAIT, tcp_data() already handled
3322                          * sending the ack.
3323                          */
3324                         reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
3325                         /*sk->fin_seq = th->seq+1;*/
3326                         tcp_statistics.TcpCurrEstab--;
3327                         sk->state = TCP_CLOSE_WAIT;
3328                         if (th->rst)
3329                                 sk->shutdown = SHUTDOWN_MASK;
3330                         break;
3331 
3332                 case TCP_CLOSE_WAIT:
3333                 case TCP_CLOSING:
3334                         /*
3335                          * received a retransmission of the FIN, do
3336                          * nothing.
3337                          */
3338                         break;
3339                 case TCP_TIME_WAIT:
3340                         /*
3341                          * received a retransmission of the FIN,
3342                          * restart the TIME_WAIT timer.
3343                          */
3344                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3345                         return(0);
3346                 case TCP_FIN_WAIT1:
3347                         /*
3348                          * This case occurs when a simultanious close
3349                          * happens, we must ack the received FIN and
3350                          * enter the CLOSING state.
3351                          *
3352                          * XXX timeout not set properly
3353                          */
3354 
3355                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3356                         /*sk->fin_seq = th->seq+1;*/
3357                         sk->state = TCP_CLOSING;
3358                         break;
3359                 case TCP_FIN_WAIT2:
3360                         /*
3361                          * received a FIN -- send ACK and enter TIME_WAIT
3362                          */
3363                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3364                         /*sk->fin_seq = th->seq+1;*/
3365                         sk->state = TCP_TIME_WAIT;
3366                         break;
3367                 case TCP_CLOSE:
3368                         /*
3369                          * already in CLOSE
3370                          */
3371                         break;
3372                 default:
3373                         sk->state = TCP_LAST_ACK;
3374         
3375                         /* Start the timers. */
3376                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3377                         return(0);
3378         }
3379         sk->ack_backlog++;
3380 
3381         return(0);
3382 }
3383 
3384 
3385 /* This will accept the next outstanding connection. */
3386 static struct sock *
3387 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
3388 {
3389         struct sock *newsk;
3390         struct sk_buff *skb;
3391   
3392   /*
3393    * We need to make sure that this socket is listening,
3394    * and that it has something pending.
3395    */
3396 
3397         if (sk->state != TCP_LISTEN) 
3398         {
3399                 sk->err = EINVAL;
3400                 return(NULL); 
3401         }
3402 
3403         /* Avoid the race. */
3404         cli();
3405         sk->inuse = 1;
3406 
3407         while((skb = skb_dequeue(&sk->receive_queue)) == NULL) 
3408         {
3409                 if (flags & O_NONBLOCK) 
3410                 {
3411                         sti();
3412                         release_sock(sk);
3413                         sk->err = EAGAIN;
3414                         return(NULL);
3415                 }
3416 
3417                 release_sock(sk);
3418                 interruptible_sleep_on(sk->sleep);
3419                 if (current->signal & ~current->blocked) 
3420                 {
3421                         sti();
3422                         sk->err = ERESTARTSYS;
3423                         return(NULL);
3424                 }
3425                 sk->inuse = 1;
3426         }
3427         sti();
3428 
3429         /*
3430          *      Now all we need to do is return skb->sk. 
3431          */
3432 
3433         newsk = skb->sk;
3434 
3435         kfree_skb(skb, FREE_READ);
3436         sk->ack_backlog--;
3437         release_sock(sk);
3438         return(newsk);
3439 }
3440 
3441 
3442 /*
3443  *      This will initiate an outgoing connection. 
3444  */
3445  
3446 static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3447 {
3448         struct sk_buff *buff;
3449         struct device *dev=NULL;
3450         unsigned char *ptr;
3451         int tmp;
3452         struct tcphdr *t1;
3453         struct rtable *rt;
3454 
3455         if (sk->state != TCP_CLOSE) 
3456                 return(-EISCONN);
3457 
3458         if (addr_len < 8) 
3459                 return(-EINVAL);
3460 
3461         if (usin->sin_family && usin->sin_family != AF_INET) 
3462                 return(-EAFNOSUPPORT);
3463 
3464         /*
3465          *      connect() to INADDR_ANY means loopback (BSD'ism).
3466          */
3467         
3468         if(usin->sin_addr.s_addr==INADDR_ANY)
3469                 usin->sin_addr.s_addr=ip_my_addr();
3470                   
3471         /*
3472          *      Don't want a TCP connection going to a broadcast address 
3473          */
3474 
3475         if (ip_chk_addr(usin->sin_addr.s_addr) == IS_BROADCAST) 
3476         { 
3477                 return -ENETUNREACH;
3478         }
3479   
3480         /*
3481          *      Connect back to the same socket: Blows up so disallow it 
3482          */
3483 
3484         if(sk->saddr == usin->sin_addr.s_addr && sk->num==ntohs(usin->sin_port))
3485                 return -EBUSY;
3486 
3487         sk->inuse = 1;
3488         sk->daddr = usin->sin_addr.s_addr;
3489         sk->write_seq = jiffies * SEQ_TICK - seq_offset;
3490         sk->window_seq = sk->write_seq;
3491         sk->rcv_ack_seq = sk->write_seq -1;
3492         sk->err = 0;
3493         sk->dummy_th.dest = usin->sin_port;
3494         release_sock(sk);
3495 
3496         buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3497         if (buff == NULL) 
3498         {
3499                 return(-ENOMEM);
3500         }
3501         sk->inuse = 1;
3502         buff->len = 24;
3503         buff->sk = sk;
3504         buff->free = 1;
3505         buff->localroute = sk->localroute;
3506         
3507         t1 = (struct tcphdr *) buff->data;
3508 
3509         /*
3510          *      Put in the IP header and routing stuff. 
3511          */
3512          
3513         rt=ip_rt_route(sk->daddr, NULL, NULL);
3514         
3515 
3516         /*
3517          *      We need to build the routing stuff fromt the things saved in skb. 
3518          */
3519 
3520         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3521                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3522         if (tmp < 0) 
3523         {
3524                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3525                 release_sock(sk);
3526                 return(-ENETUNREACH);
3527         }
3528 
3529         buff->len += tmp;
3530         t1 = (struct tcphdr *)((char *)t1 +tmp);
3531 
3532         memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3533         t1->seq = ntohl(sk->write_seq++);
3534         sk->sent_seq = sk->write_seq;
3535         buff->h.seq = sk->write_seq;
3536         t1->ack = 0;
3537         t1->window = 2;
3538         t1->res1=0;
3539         t1->res2=0;
3540         t1->rst = 0;
3541         t1->urg = 0;
3542         t1->psh = 0;
3543         t1->syn = 1;
3544         t1->urg_ptr = 0;
3545         t1->doff = 6;
3546         /* use 512 or whatever user asked for */
3547         
3548         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
3549                 sk->window_clamp=rt->rt_window;
3550         else
3551                 sk->window_clamp=0;
3552 
3553         if (sk->user_mss)
3554                 sk->mtu = sk->user_mss;
3555         else if(rt!=NULL && (rt->rt_flags&RTF_MTU))
3556                 sk->mtu = rt->rt_mss;
3557         else 
3558         {
3559 #ifdef CONFIG_INET_SNARL
3560                 if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3561 #else
3562                 if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3563 #endif
3564                         sk->mtu = 576 - HEADER_SIZE;
3565                 else
3566                         sk->mtu = MAX_WINDOW;
3567         }
3568         /*
3569          *      but not bigger than device MTU 
3570          */
3571 
3572         if(sk->mtu <32)
3573                 sk->mtu = 32;   /* Sanity limit */
3574                 
3575         sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3576         
3577         /*
3578          *      Put in the TCP options to say MTU. 
3579          */
3580 
3581         ptr = (unsigned char *)(t1+1);
3582         ptr[0] = 2;
3583         ptr[1] = 4;
3584         ptr[2] = (sk->mtu) >> 8;
3585         ptr[3] = (sk->mtu) & 0xff;
3586         tcp_send_check(t1, sk->saddr, sk->daddr,
3587                   sizeof(struct tcphdr) + 4, sk);
3588 
3589         /*
3590          *      This must go first otherwise a really quick response will get reset. 
3591          */
3592 
3593         sk->state = TCP_SYN_SENT;
3594 /*      sk->rtt = TCP_CONNECT_TIME;*/
3595         sk->rto = TCP_TIMEOUT_INIT;
3596         reset_timer(sk, TIME_WRITE, sk->rto);   /* Timer for repeating the SYN until an answer */
3597         sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3598 
3599         sk->prot->queue_xmit(sk, dev, buff, 0);  
3600         tcp_statistics.TcpActiveOpens++;
3601         tcp_statistics.TcpOutSegs++;
3602   
3603         release_sock(sk);
3604         return(0);
3605 }
3606 
3607 
3608 /* This functions checks to see if the tcp header is actually acceptable. */
3609 static int
3610 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3611              struct options *opt, unsigned long saddr, struct device *dev)
3612 {
3613         unsigned long next_seq;
3614 
3615         next_seq = len - 4*th->doff;
3616         if (th->fin)
3617                 next_seq++;
3618         /* if we have a zero window, we can't have any data in the packet.. */
3619         if (next_seq && !sk->window)
3620                 goto ignore_it;
3621         next_seq += th->seq;
3622 
3623         /*
3624          * This isn't quite right.  sk->acked_seq could be more recent
3625          * than sk->window.  This is however close enough.  We will accept
3626          * slightly more packets than we should, but it should not cause
3627          * problems unless someone is trying to forge packets.
3628          */
3629 
3630         /* have we already seen all of this packet? */
3631         if (!after(next_seq+1, sk->acked_seq))
3632                 goto ignore_it;
3633         /* or does it start beyond the window? */
3634         if (!before(th->seq, sk->acked_seq + sk->window + 1))
3635                 goto ignore_it;
3636 
3637         /* ok, at least part of this packet would seem interesting.. */
3638         return 1;
3639 
3640 ignore_it:
3641         if (th->rst)
3642                 return 0;
3643 
3644         /*
3645          *      Send a reset if we get something not ours and we are
3646          *      unsynchronized. Note: We don't do anything to our end. We
3647          *      are just killing the bogus remote connection then we will
3648          *      connect again and it will work (with luck).
3649          */
3650          
3651         if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) {
3652                 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3653                 return 1;
3654         }
3655 
3656         /* Try to resync things. */
3657         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3658         return 0;
3659 }
3660 
3661 
3662 #ifdef TCP_FASTPATH
3663 /*
3664  *      Is the end of the queue clear of fragments as yet unmerged into the data stream
3665  *      Yes if
3666  *      a) The queue is empty
3667  *      b) The last frame on the queue has the acked flag set
3668  */
3669 
3670 static inline int tcp_clean_end(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3671 {
3672         struct sk_buff *skb=skb_peek(&sk->receive_queue);
3673         if(skb==NULL || sk->receive_queue.prev->acked)
3674                 return 1;
3675 }
3676 
3677 #endif
3678 
3679 int
3680 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3681         unsigned long daddr, unsigned short len,
3682         unsigned long saddr, int redo, struct inet_protocol * protocol)
3683 {
3684         struct tcphdr *th;
3685         struct sock *sk;
3686 
3687         if (!skb) 
3688         {
3689                 return(0);
3690         }
3691 
3692         if (!dev) 
3693         {
3694                 return(0);
3695         }
3696   
3697         tcp_statistics.TcpInSegs++;
3698   
3699         if(skb->pkt_type!=PACKET_HOST)
3700         {
3701                 kfree_skb(skb,FREE_READ);
3702                 return(0);
3703         }
3704   
3705         th = skb->h.th;
3706 
3707         /*
3708          *      Find the socket.
3709          */
3710 
3711         sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3712 
3713         /*
3714          *      If this socket has got a reset its to all intents and purposes 
3715          *      really dead 
3716          */
3717          
3718         if (sk!=NULL && sk->zapped)
3719                 sk=NULL;
3720 
3721         if (!redo) 
3722         {
3723                 if (tcp_check(th, len, saddr, daddr )) 
3724                 {
3725                         skb->sk = NULL;
3726                         kfree_skb(skb,FREE_READ);
3727                         /*
3728                          * We don't release the socket because it was
3729                          * never marked in use.
3730                          */
3731                         return(0);
3732                 }
3733                 th->seq = ntohl(th->seq);
3734 
3735                 /* See if we know about the socket. */
3736                 if (sk == NULL) 
3737                 {
3738                         if (!th->rst)
3739                                 tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3740                         skb->sk = NULL;
3741                         kfree_skb(skb, FREE_READ);
3742                         return(0);
3743                 }
3744 
3745                 skb->len = len;
3746                 skb->sk = sk;
3747                 skb->acked = 0;
3748                 skb->used = 0;
3749                 skb->free = 0;
3750                 skb->saddr = daddr;
3751                 skb->daddr = saddr;
3752         
3753                 /* We may need to add it to the backlog here. */
3754                 cli();
3755                 if (sk->inuse) 
3756                 {
3757                         skb_queue_head(&sk->back_log, skb);
3758                         sti();
3759                         return(0);
3760                 }
3761                 sk->inuse = 1;
3762                 sti();
3763         }
3764         else
3765         {
3766                 if (!sk) 
3767                 {
3768                         return(0);
3769                 }
3770         }
3771 
3772 
3773         if (!sk->prot) 
3774         {
3775                 return(0);
3776         }
3777 
3778 
3779         /*
3780          *      Charge the memory to the socket. 
3781          */
3782          
3783         if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) 
3784         {
3785                 skb->sk = NULL;
3786                 kfree_skb(skb, FREE_READ);
3787                 release_sock(sk);
3788                 return(0);
3789         }
3790 
3791         sk->rmem_alloc += skb->mem_len;
3792 
3793 #ifdef TCP_FASTPATH
3794 /*
3795  *      Incoming data stream fastpath. 
3796  *
3797  *      We try to optimise two things.
3798  *      1) Spot general data arriving without funny options and skip extra checks and the switch.
3799  *      2) Spot the common case in raw data receive streams of a packet that has no funny options,
3800  *      fits exactly on the end of the current queue and may or may not have the ack bit set.
3801  *
3802  *      Case two especially is done inline in this routine so there are no long jumps causing heavy
3803  *      cache thrashing, no function call overhead (except for the ack sending if needed) and for
3804  *      speed although further optimizing here is possible.
3805  */
3806  
3807         /* Im trusting gcc to optimise this sensibly... might need judicious application of a software mallet */
3808         if(!(sk->shutdown & RCV_SHUTDOWN) && sk->state==TCP_ESTABLISHED && !th->urg && !th->syn && !th->fin && !th->rst)
3809         {       
3810                 /* Packets in order. Fits window */
3811                 if(th->seq == sk->acked_seq+1 && sk->window && tcp_clean_end(sk))
3812                 {
3813                         /* Ack is harder */
3814                         if(th->ack && !tcp_ack(sk, th, saddr, len))
3815                         {
3816                                 kfree_skb(skb, FREE_READ);
3817                                 release_sock(sk);
3818                                 return 0;
3819                         }
3820                         /*
3821                          *      Set up variables
3822                          */
3823                         skb->len -= (th->doff *4);
3824                         sk->bytes_rcv += skb->len;
3825                         tcp_rx_hit2++;
3826                         if(skb->len)
3827                         {
3828                                 skb_queue_tail(&sk->receive_queue,skb); /* We already know where to put it */
3829                                 if(sk->window >= skb->len)
3830                                         sk->window-=skb->len;                   /* We know its effect on the window */
3831                                 else
3832                                         sk->window=0;
3833                                 sk->acked_seq = th->seq+skb->len;       /* Easy */
3834                                 skb->acked=1;                           /* Guaranteed true */
3835                                 if(!sk->delay_acks || sk->ack_backlog >= sk->max_ack_backlog || 
3836                                         sk->bytes_rcv > sk->max_unacked)
3837                                 {
3838                                         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th , saddr);
3839                                 }
3840                                 else
3841                                 {
3842                                         sk->ack_backlog++;
3843                                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3844                                 }
3845                                 if(!sk->dead)
3846                                         sk->data_ready(sk,0);
3847                                 release_sock(sk);
3848                                 return 0;
3849                         }
3850                 }
3851                 /*
3852                  *      More generic case of arriving data stream in ESTABLISHED
3853                  */
3854                 tcp_rx_hit1++;
3855                 if(!tcp_sequence(sk, th, len, opt, saddr, dev))
3856                 {
3857                         kfree_skb(skb, FREE_READ);
3858                         release_sock(sk);
3859                         return 0;
3860                 }
3861                 if(th->ack && !tcp_ack(sk, th, saddr, len))
3862                 {
3863                         kfree_skb(skb, FREE_READ);
3864                         release_sock(sk);
3865                         return 0;
3866                 }
3867                 if(tcp_data(skb, sk, saddr, len))
3868                         kfree_skb(skb, FREE_READ);
3869                 release_sock(sk);
3870                 return 0;
3871         }
3872         tcp_rx_miss++;
3873 #endif  
3874 
3875         /*
3876          *      Now deal with all cases.
3877          */
3878          
3879         switch(sk->state) 
3880         {
3881         
3882                 /*
3883                  * This should close the system down if it's waiting
3884                  * for an ack that is never going to be sent.
3885                  */
3886                 case TCP_LAST_ACK:
3887                         if (th->rst) 
3888                         {
3889                                 sk->zapped=1;
3890                                 sk->err = ECONNRESET;
3891                                 sk->state = TCP_CLOSE;
3892                                 sk->shutdown = SHUTDOWN_MASK;
3893                                 if (!sk->dead) 
3894                                 {
3895                                         sk->state_change(sk);
3896                                 }
3897                                 kfree_skb(skb, FREE_READ);
3898                                 release_sock(sk);
3899                                 return(0);
3900                         }
3901 
3902                 case TCP_ESTABLISHED:
3903                 case TCP_CLOSE_WAIT:
3904                 case TCP_CLOSING:
3905                 case TCP_FIN_WAIT1:
3906                 case TCP_FIN_WAIT2:
3907                 case TCP_TIME_WAIT:
3908                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
3909                         {
3910                                 kfree_skb(skb, FREE_READ);
3911                                 release_sock(sk);
3912                                 return(0);
3913                         }
3914 
3915                         if (th->rst) 
3916                         {
3917                                 tcp_statistics.TcpEstabResets++;
3918                                 tcp_statistics.TcpCurrEstab--;
3919                                 sk->zapped=1;
3920                                 /* This means the thing should really be closed. */
3921                                 sk->err = ECONNRESET;
3922                                 if (sk->state == TCP_CLOSE_WAIT) 
3923                                 {
3924                                         sk->err = EPIPE;
3925                                 }
3926         
3927                                 /*
3928                                  * A reset with a fin just means that
3929                                  * the data was not all read.
3930                                  */
3931                                 sk->state = TCP_CLOSE;
3932                                 sk->shutdown = SHUTDOWN_MASK;
3933                                 if (!sk->dead) 
3934                                 {
3935                                         sk->state_change(sk);
3936                                 }
3937                                 kfree_skb(skb, FREE_READ);
3938                                 release_sock(sk);
3939                                 return(0);
3940                         }
3941                         if (th->syn) 
3942                         {
3943                                 tcp_statistics.TcpCurrEstab--;
3944                                 tcp_statistics.TcpEstabResets++;
3945                                 sk->err = ECONNRESET;
3946                                 sk->state = TCP_CLOSE;
3947                                 sk->shutdown = SHUTDOWN_MASK;
3948                                 tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
3949                                 if (!sk->dead) {
3950                                         sk->state_change(sk);
3951                                 }
3952                                 kfree_skb(skb, FREE_READ);
3953                                 release_sock(sk);
3954                                 return(0);
3955                         }
3956         
3957                         if (th->ack && !tcp_ack(sk, th, saddr, len)) {
3958                                 kfree_skb(skb, FREE_READ);
3959                                 release_sock(sk);
3960                                 return(0);
3961                         }
3962         
3963                         if (tcp_urg(sk, th, saddr, len)) {
3964                                 kfree_skb(skb, FREE_READ);
3965                                 release_sock(sk);
3966                                 return(0);
3967                         }
3968 
3969         
3970                         if (tcp_data(skb, sk, saddr, len)) {
3971                                 kfree_skb(skb, FREE_READ);
3972                                 release_sock(sk);
3973                                 return(0);
3974                         }       
3975 
3976                         if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) {
3977                                 kfree_skb(skb, FREE_READ);
3978                                 release_sock(sk);
3979                                 return(0);
3980                         }
3981         
3982                         release_sock(sk);
3983                         return(0);
3984                 
3985                 case TCP_CLOSE:
3986                         if (sk->dead || sk->daddr) {
3987                                 kfree_skb(skb, FREE_READ);
3988                                         release_sock(sk);
3989                                 return(0);
3990                         }
3991         
3992                         if (!th->rst) {
3993                                 if (!th->ack)
3994                                         th->ack_seq = 0;
3995                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3996                         }
3997                         kfree_skb(skb, FREE_READ);
3998                         release_sock(sk);
3999                                 return(0);
4000         
4001                 case TCP_LISTEN:
4002                         if (th->rst) {
4003                                 kfree_skb(skb, FREE_READ);
4004                                 release_sock(sk);
4005                                 return(0);
4006                         }
4007                         if (th->ack) {
4008                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4009                                 kfree_skb(skb, FREE_READ);
4010                                 release_sock(sk);
4011                                 return(0);
4012                         }
4013         
4014                         if (th->syn) 
4015                         {
4016                                 /*
4017                                  * Now we just put the whole thing including
4018                                  * the header and saddr, and protocol pointer
4019                                  * into the buffer.  We can't respond until the
4020                                  * user tells us to accept the connection.
4021                                  */
4022                                 tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
4023                                 release_sock(sk);
4024                                 return(0);
4025                         }
4026 
4027                         kfree_skb(skb, FREE_READ);
4028                         release_sock(sk);
4029                         return(0);
4030 
4031                 case TCP_SYN_RECV:
4032                         if (th->syn) {
4033                                 /* Probably a retransmitted syn */
4034                                 kfree_skb(skb, FREE_READ);
4035                                 release_sock(sk);
4036                                 return(0);
4037                         }
4038         
4039         
4040                 default:
4041                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
4042                         {
4043                                 kfree_skb(skb, FREE_READ);
4044                                 release_sock(sk);
4045                                 return(0);
4046                         }
4047         
4048                 case TCP_SYN_SENT:
4049                         if (th->rst) 
4050                         {
4051                                 tcp_statistics.TcpAttemptFails++;
4052                                 sk->err = ECONNREFUSED;
4053                                 sk->state = TCP_CLOSE;
4054                                 sk->shutdown = SHUTDOWN_MASK;
4055                                 sk->zapped = 1;
4056                                 if (!sk->dead) 
4057                                 {
4058                                         sk->state_change(sk);
4059                                 }
4060                                 kfree_skb(skb, FREE_READ);
4061                                 release_sock(sk);
4062                                 return(0);
4063                         }
4064                         if (!th->ack) 
4065                         {
4066                                 if (th->syn) 
4067                                 {
4068                                         sk->state = TCP_SYN_RECV;
4069                                 }
4070                                 kfree_skb(skb, FREE_READ);
4071                                 release_sock(sk);
4072                                 return(0);
4073                         }
4074         
4075                         switch(sk->state) 
4076                         {
4077                                 case TCP_SYN_SENT:
4078                                         if (!tcp_ack(sk, th, saddr, len)) 
4079                                         {
4080                                                 tcp_statistics.TcpAttemptFails++;
4081                                                 tcp_reset(daddr, saddr, th,
4082                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4083                                                 kfree_skb(skb, FREE_READ);
4084                                                         release_sock(sk);
4085                                                 return(0);
4086                                         }
4087         
4088                                         /*
4089                                          * If the syn bit is also set, switch to
4090                                          * tcp_syn_recv, and then to established.
4091                                          */
4092                                         if (!th->syn) 
4093                                         {
4094                                                 kfree_skb(skb, FREE_READ);
4095                                                 release_sock(sk);
4096                                                 return(0);
4097                                         }
4098         
4099                                         /* Ack the syn and fall through. */
4100                                         sk->acked_seq = th->seq+1;
4101                                         sk->fin_seq = th->seq;
4102                                         tcp_send_ack(sk->sent_seq, th->seq+1,
4103                                                 sk, th, sk->daddr);
4104                 
4105                                 case TCP_SYN_RECV:
4106                                         if (!tcp_ack(sk, th, saddr, len)) 
4107                                         {
4108                                                 tcp_statistics.TcpAttemptFails++;
4109                                                 tcp_reset(daddr, saddr, th,
4110                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
4111                                                 kfree_skb(skb, FREE_READ);
4112                                                 release_sock(sk);
4113                                                 return(0);
4114                                         }
4115         
4116                                         tcp_statistics.TcpCurrEstab++;
4117                                         sk->state = TCP_ESTABLISHED;
4118         
4119                                         /*
4120                                          *      Now we need to finish filling out
4121                                          *      some of the tcp header.
4122                                          * 
4123                                          *      We need to check for mtu info. 
4124                                          */
4125                                         tcp_options(sk, th);
4126                                         sk->dummy_th.dest = th->source;
4127                                         sk->copied_seq = sk->acked_seq-1;
4128                                         if (!sk->dead) 
4129                                         {
4130                                                 sk->state_change(sk);
4131                                         }
4132         
4133                                         /*
4134                                          * We've already processed his first
4135                                          * ack.  In just about all cases that
4136                                          * will have set max_window.  This is
4137                                          * to protect us against the possibility
4138                                          * that the initial window he sent was 0.
4139                                          * This must occur after tcp_options, which
4140                                          * sets sk->mtu.
4141                                          */
4142                                         if (sk->max_window == 0) 
4143                                         {
4144                                                 sk->max_window = 32;
4145                                                 sk->mss = min(sk->max_window, sk->mtu);
4146                                         }
4147 
4148                                         /*
4149                                          * Now process the rest like we were
4150                                          * already in the established state.
4151                                          */
4152                                         if (th->urg) 
4153                                         {
4154                                                 if (tcp_urg(sk, th, saddr, len)) 
4155                                                 { 
4156                                                         kfree_skb(skb, FREE_READ);
4157                                                         release_sock(sk);
4158                                                         return(0);
4159                                                 }
4160                                         }
4161                                         if (tcp_data(skb, sk, saddr, len))
4162                                                 kfree_skb(skb, FREE_READ);
4163 
4164                                         if (th->fin)
4165                                                 tcp_fin(skb, sk, th, saddr, dev);
4166                                         release_sock(sk);
4167                                         return(0);
4168                         }
4169         
4170                         if (th->urg) 
4171                         {
4172                                 if (tcp_urg(sk, th, saddr, len)) 
4173                                 {
4174                                         kfree_skb(skb, FREE_READ);
4175                                         release_sock(sk);
4176                                         return(0);
4177                                 }
4178                         }
4179                         if (tcp_data(skb, sk, saddr, len)) 
4180                         {
4181                                 kfree_skb(skb, FREE_READ);
4182                                 release_sock(sk);
4183                                 return(0);
4184                         }
4185         
4186                         if (!th->fin) 
4187                         {
4188                                 release_sock(sk);
4189                                 return(0);
4190                         }
4191                         tcp_fin(skb, sk, th, saddr, dev);
4192                         release_sock(sk);
4193                         return(0);
4194         }
4195 }
4196 
4197 
4198 /*
4199  * This routine sends a packet with an out of date sequence
4200  * number. It assumes the other end will try to ack it.
4201  */
4202 
4203 static void tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4204 {
4205         struct sk_buff *buff;
4206         struct tcphdr *t1;
4207         struct device *dev=NULL;
4208         int tmp;
4209 
4210         if (sk->zapped)
4211                 return; /* Afer a valid reset we can send no more */
4212 
4213         /*
4214          * Write data can still be transmitted/retransmitted in the
4215          * following states.  If any other state is encountered, return.
4216          */
4217 
4218         if (sk->state != TCP_ESTABLISHED && 
4219             sk->state != TCP_CLOSE_WAIT &&
4220             sk->state != TCP_FIN_WAIT1 && 
4221             sk->state != TCP_LAST_ACK &&
4222             sk->state != TCP_CLOSING
4223         ) {
4224                 return;
4225         }
4226 
4227         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
4228         if (buff == NULL) 
4229                 return;
4230 
4231         buff->len = sizeof(struct tcphdr);
4232         buff->free = 1;
4233         buff->sk = sk;
4234         buff->localroute = sk->localroute;
4235 
4236         t1 = (struct tcphdr *) buff->data;
4237 
4238         /* Put in the IP header and routing stuff. */
4239         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
4240                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
4241         if (tmp < 0) 
4242         {
4243                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
4244                 return;
4245         }
4246 
4247         buff->len += tmp;
4248         t1 = (struct tcphdr *)((char *)t1 +tmp);
4249 
4250         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
4251 
4252         /*
4253          * Use a previous sequence.
4254          * This should cause the other end to send an ack.
4255          */
4256         t1->seq = htonl(sk->sent_seq-1);
4257         t1->ack = 1; 
4258         t1->res1= 0;
4259         t1->res2= 0;
4260         t1->rst = 0;
4261         t1->urg = 0;
4262         t1->psh = 0;
4263         t1->fin = 0;
4264         t1->syn = 0;
4265         t1->ack_seq = ntohl(sk->acked_seq);
4266         t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
4267         t1->doff = sizeof(*t1)/4;
4268         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
4269 
4270          /*     Send it and free it.
4271           *     This will prevent the timer from automatically being restarted.
4272           */
4273         sk->prot->queue_xmit(sk, dev, buff, 1);
4274         tcp_statistics.TcpOutSegs++;
4275 }
4276 
4277 void
4278 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4279 {
4280         if (sk->zapped)
4281                 return;         /* Afer a valid reset we can send no more */
4282 
4283         tcp_write_wakeup(sk);
4284 
4285         sk->backoff++;
4286         sk->rto = min(sk->rto << 1, 120*HZ);
4287         reset_timer (sk, TIME_PROBE0, sk->rto);
4288         sk->retransmits++;
4289         sk->prot->retransmits ++;
4290 }
4291 
4292 /*
4293  *      Socket option code for TCP. 
4294  */
4295   
4296 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4297 {
4298         int val,err;
4299 
4300         if(level!=SOL_TCP)
4301                 return ip_setsockopt(sk,level,optname,optval,optlen);
4302 
4303         if (optval == NULL) 
4304                 return(-EINVAL);
4305 
4306         err=verify_area(VERIFY_READ, optval, sizeof(int));
4307         if(err)
4308                 return err;
4309         
4310         val = get_fs_long((unsigned long *)optval);
4311 
4312         switch(optname)
4313         {
4314                 case TCP_MAXSEG:
4315 /*                      if(val<200||val>2048 || val>sk->mtu) */
4316 /*
4317  * values greater than interface MTU won't take effect.  however at
4318  * the point when this call is done we typically don't yet know
4319  * which interface is going to be used
4320  */
4321                         if(val<1||val>MAX_WINDOW)
4322                                 return -EINVAL;
4323                         sk->user_mss=val;
4324                         return 0;
4325                 case TCP_NODELAY:
4326                         sk->nonagle=(val==0)?0:1;
4327                         return 0;
4328                 default:
4329                         return(-ENOPROTOOPT);
4330         }
4331 }
4332 
4333 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4334 {
4335         int val,err;
4336 
4337         if(level!=SOL_TCP)
4338                 return ip_getsockopt(sk,level,optname,optval,optlen);
4339                         
4340         switch(optname)
4341         {
4342                 case TCP_MAXSEG:
4343                         val=sk->user_mss;
4344                         break;
4345                 case TCP_NODELAY:
4346                         val=sk->nonagle;        /* Until Johannes stuff is in */
4347                         break;
4348                 default:
4349                         return(-ENOPROTOOPT);
4350         }
4351         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
4352         if(err)
4353                 return err;
4354         put_fs_long(sizeof(int),(unsigned long *) optlen);
4355 
4356         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
4357         if(err)
4358                 return err;
4359         put_fs_long(val,(unsigned long *)optval);
4360 
4361         return(0);
4362 }       
4363 
4364 
4365 struct proto tcp_prot = {
4366         sock_wmalloc,
4367         sock_rmalloc,
4368         sock_wfree,
4369         sock_rfree,
4370         sock_rspace,
4371         sock_wspace,
4372         tcp_close,
4373         tcp_read,
4374         tcp_write,
4375         tcp_sendto,
4376         tcp_recvfrom,
4377         ip_build_header,
4378         tcp_connect,
4379         tcp_accept,
4380         ip_queue_xmit,
4381         tcp_retransmit,
4382         tcp_write_wakeup,
4383         tcp_read_wakeup,
4384         tcp_rcv,
4385         tcp_select,
4386         tcp_ioctl,
4387         NULL,
4388         tcp_shutdown,
4389         tcp_setsockopt,
4390         tcp_getsockopt,
4391         128,
4392         0,
4393         {NULL,},
4394         "TCP"
4395 };

/* [previous][next][first][last][top][bottom][index][help] */