root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. tcp_select_window
  3. tcp_time_wait
  4. tcp_retransmit
  5. tcp_err
  6. tcp_readable
  7. tcp_select
  8. tcp_ioctl
  9. tcp_check
  10. tcp_send_check
  11. tcp_send_skb
  12. tcp_dequeue_partial
  13. tcp_send_partial
  14. tcp_enqueue_partial
  15. tcp_send_ack
  16. tcp_build_header
  17. tcp_write
  18. tcp_sendto
  19. tcp_read_wakeup
  20. cleanup_rbuf
  21. tcp_read_urg
  22. tcp_read
  23. tcp_shutdown
  24. tcp_recvfrom
  25. tcp_reset
  26. tcp_options
  27. default_mask
  28. tcp_conn_request
  29. tcp_close
  30. tcp_write_xmit
  31. sort_send
  32. tcp_ack
  33. tcp_data
  34. tcp_check_urg
  35. tcp_urg
  36. tcp_fin
  37. tcp_accept
  38. tcp_connect
  39. tcp_sequence
  40. tcp_clean_end
  41. tcp_rcv
  42. tcp_write_wakeup
  43. tcp_send_probe0
  44. tcp_setsockopt
  45. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@no.unit.nvg>
  20  *
  21  * Fixes:       
  22  *              Alan Cox        :       Numerous verify_area() calls
  23  *              Alan Cox        :       Set the ACK bit on a reset
  24  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  25  *                                      and was trying to connect (tcp_err()).
  26  *              Alan Cox        :       All icmp error handling was broken
  27  *                                      pointers passed where wrong and the
  28  *                                      socket was looked up backwards. Nobody
  29  *                                      tested any icmp error code obviously.
  30  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  31  *                                      on errors. select behaves and the icmp error race
  32  *                                      has gone by moving it into sock.c
  33  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  34  *                                      packets for unknown sockets.
  35  *              Alan Cox        :       tcp option processing.
  36  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  37  *              Herp Rosmanith  :       More reset fixes
  38  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  39  *                                      any kind of RST is right out.
  40  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  41  *                                      otherwise odd bits of prattle escape still
  42  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  43  *                                      LAN workplace lockups.
  44  *              Alan Cox        :       Some tidyups using the new skb list facilities
  45  *              Alan Cox        :       sk->keepopen now seems to work
  46  *              Alan Cox        :       Pulls options out correctly on accepts
  47  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  48  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  49  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  50  *              Alan Cox        :       Added some beter commenting, as the tcp is hard to follow
  51  *              Alan Cox        :       Removed incorrect check for 20 * psh
  52  *      Michael O'Reilly        :       ack < copied bug fix.
  53  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  54  *              Alan Cox        :       FIN with no memory -> CRASH
  55  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  56  *              Alan Cox        :       Added TCP options (SOL_TCP)
  57  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  58  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  59  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  60  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  61  *              Alan Cox        :       Put in missing check for SYN bit.
  62  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  63  *                                      window non shrink trick.
  64  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  65  *              Charles Hedrick :       TCP fixes
  66  *              Toomas Tamm     :       TCP window fixes
  67  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  68  *              Charles Hedrick :       Rewrote most of it to actually work
  69  *              Linus           :       Rewrote tcp_read() and URG handling
  70  *                                      completely
  71  *              Gerhard Koerting:       Fixed some missing timer handling
  72  *              Matthew Dillon  :       Reworked TCP machine states as per RFC
  73  *              Gerhard Koerting:       PC/TCP workarounds
  74  *              Adam Caldwell   :       Assorted timer/timing errors
  75  *              Matthew Dillon  :       Fixed another RST bug
  76  *              Alan Cox        :       Move to kernel side addressing changes.
  77  *              Alan Cox        :       Beginning work on TCP fastpathing (not yet usable)
  78  *              Arnt Gulbrandsen:       Turbocharged tcp_check() routine.
  79  *              Alan Cox        :       TCP fast path debugging
  80  *              Alan Cox        :       Window clamping
  81  *              Michael Riepe   :       Bug in tcp_check()
  82  *
  83  *
  84  * To Fix:
  85  *                      Possibly a problem with accept(). BSD accept never fails after
  86  *              it causes a select. Linux can - given the official select semantics I
  87  *              feel that _really_ its the BSD network programs that are bust (notably
  88  *              inetd, which hangs occasionally because of this).
  89  *
  90  *                      Fast path the code. Two things here - fix the window calculation
  91  *              so it doesn't iterate over the queue, also spot packets with no funny
  92  *              options arriving in order and process directly.
  93  *
  94  *              This program is free software; you can redistribute it and/or
  95  *              modify it under the terms of the GNU General Public License
  96  *              as published by the Free Software Foundation; either version
  97  *              2 of the License, or(at your option) any later version.
  98  *
  99  * Description of States:
 100  *
 101  *      TCP_SYN_SENT            sent a connection request, waiting for ack
 102  *
 103  *      TCP_SYN_RECV            received a connection request, sent ack,
 104  *                              waiting for final ack in three-way handshake.
 105  *
 106  *      TCP_ESTABLISHED         connection established
 107  *
 108  *      TCP_FIN_WAIT1           our side has shutdown, waiting to complete
 109  *                              transmission of remaining buffered data
 110  *
 111  *      TCP_FIN_WAIT2           all buffered data sent, waiting for remote
 112  *                              to shutdown
 113  *
 114  *      TCP_CLOSING             both sides have shutdown but we still have
 115  *                              data we have to finish sending
 116  *
 117  *      TCP_TIME_WAIT           timeout to catch resent junk before entering
 118  *                              closed, can only be entered from FIN_WAIT2
 119  *                              or CLOSING.  Required because the other end
 120  *                              may not have gotten our last ACK causing it
 121  *                              to retransmit the data packet (which we ignore)
 122  *
 123  *      TCP_CLOSE_WAIT          remote side has shutdown and is waiting for
 124  *                              us to finish writing our data and to shutdown
 125  *                              (we have to close() to move on to LAST_ACK)
 126  *
 127  *      TCP_LAST_ACK            out side has shutdown after remote has
 128  *                              shutdown.  There may still be data in our
 129  *                              buffer that we have to finish sending
 130  *              
 131  *      TCP_CLOSE               socket is finished
 132  */
 133 #include <linux/types.h>
 134 #include <linux/sched.h>
 135 #include <linux/mm.h>
 136 #include <linux/string.h>
 137 #include <linux/socket.h>
 138 #include <linux/sockios.h>
 139 #include <linux/termios.h>
 140 #include <linux/in.h>
 141 #include <linux/fcntl.h>
 142 #include <linux/inet.h>
 143 #include <linux/netdevice.h>
 144 #include "snmp.h"
 145 #include "ip.h"
 146 #include "protocol.h"
 147 #include "icmp.h"
 148 #include "tcp.h"
 149 #include <linux/skbuff.h>
 150 #include "sock.h"
 151 #include "route.h"
 152 #include <linux/errno.h>
 153 #include <linux/timer.h>
 154 #include <asm/system.h>
 155 #include <asm/segment.h>
 156 #include <linux/mm.h>
 157 
 158 #undef TCP_FASTPATH
 159 
 160 #define SEQ_TICK 3
 161 unsigned long seq_offset;
 162 struct tcp_mib  tcp_statistics;
 163 
 164 #ifdef TCP_FASTPATH
 165 unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0;
 166 #endif
 167 
 168 
 169 static __inline__ int min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 170 {
 171         if (a < b) 
 172                 return(a);
 173         return(b);
 174 }
 175 
 176 
 177 /* This routine picks a TCP windows for a socket based on
 178    the following constraints
 179    
 180    1. The window can never be shrunk once it is offered (RFC 793)
 181    2. We limit memory per socket
 182    
 183    For now we use NET2E3's heuristic of offering half the memory
 184    we have handy. All is not as bad as this seems however because
 185    of two things. Firstly we will bin packets even within the window
 186    in order to get the data we are waiting for into the memory limit.
 187    Secondly we bin common duplicate forms at receive time
 188    
 189    Better heuristics welcome
 190 */
 191    
 192 int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 193 {
 194         int new_window = sk->prot->rspace(sk);
 195         
 196         if(sk->window_clamp)
 197                 new_window=min(sk->window_clamp,new_window);
 198 /*
 199  * two things are going on here.  First, we don't ever offer a
 200  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 201  * receiver side of SWS as specified in RFC1122.
 202  * Second, we always give them at least the window they
 203  * had before, in order to avoid retracting window.  This
 204  * is technically allowed, but RFC1122 advises against it and
 205  * in practice it causes trouble.
 206  */
 207         if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window)
 208                 return(sk->window);
 209         return(new_window);
 210 }
 211 
 212 /*
 213  *      Enter the time wait state. 
 214  */
 215 
 216 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 217 {
 218         sk->state = TCP_TIME_WAIT;
 219         sk->shutdown = SHUTDOWN_MASK;
 220         if (!sk->dead)
 221                 sk->state_change(sk);
 222         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 223 }
 224 
 225 /*
 226  *      A timer event has trigger a tcp retransmit timeout. The
 227  *      socket xmit queue is ready and set up to send. Because
 228  *      the ack receive code keeps the queue straight we do
 229  *      nothing clever here.
 230  */
 231 
 232 static void tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 233 {
 234         if (all) 
 235         {
 236                 ip_retransmit(sk, all);
 237                 return;
 238         }
 239 
 240         sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 241         /* sk->ssthresh in theory can be zero.  I guess that's OK */
 242         sk->cong_count = 0;
 243 
 244         sk->cong_window = 1;
 245 
 246         /* Do the actual retransmit. */
 247         ip_retransmit(sk, all);
 248 }
 249 
 250 
 251 /*
 252  * This routine is called by the ICMP module when it gets some
 253  * sort of error condition.  If err < 0 then the socket should
 254  * be closed and the error returned to the user.  If err > 0
 255  * it's just the icmp type << 8 | icmp code.  After adjustment
 256  * header points to the first 8 bytes of the tcp header.  We need
 257  * to find the appropriate port.
 258  */
 259 
 260 void tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 261         unsigned long saddr, struct inet_protocol *protocol)
 262 {
 263         struct tcphdr *th;
 264         struct sock *sk;
 265         struct iphdr *iph=(struct iphdr *)header;
 266   
 267         header+=4*iph->ihl;
 268    
 269 
 270         th =(struct tcphdr *)header;
 271         sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr);
 272 
 273         if (sk == NULL) 
 274                 return;
 275   
 276         if(err<0)
 277         {
 278                 sk->err = -err;
 279                 sk->error_report(sk);
 280                 return;
 281         }
 282 
 283         if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) 
 284         {
 285                 /*
 286                  * FIXME:
 287                  * For now we will just trigger a linear backoff.
 288                  * The slow start code should cause a real backoff here.
 289                  */
 290                 if (sk->cong_window > 4)
 291                         sk->cong_window--;
 292                 return;
 293         }
 294 
 295 /*      sk->err = icmp_err_convert[err & 0xff].errno;  -- moved as TCP should hide non fatals internally (and does) */
 296 
 297         /*
 298          * If we've already connected we will keep trying
 299          * until we time out, or the user gives up.
 300          */
 301 
 302         if (icmp_err_convert[err & 0xff].fatal || sk->state == TCP_SYN_SENT) 
 303         {
 304                 if (sk->state == TCP_SYN_SENT) 
 305                 {
 306                         tcp_statistics.TcpAttemptFails++;
 307                         sk->state = TCP_CLOSE;
 308                         sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 309                 }
 310                 sk->err = icmp_err_convert[err & 0xff].errno;           
 311         }
 312         return;
 313 }
 314 
 315 
 316 /*
 317  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 318  *      in the received data queue (ie a frame missing that needs sending to us)
 319  */
 320 
 321 static int tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 322 {
 323         unsigned long counted;
 324         unsigned long amount;
 325         struct sk_buff *skb;
 326         int sum;
 327         unsigned long flags;
 328 
 329         if(sk && sk->debug)
 330                 printk("tcp_readable: %p - ",sk);
 331 
 332         save_flags(flags);
 333         cli();
 334         if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 335         {
 336                 restore_flags(flags);
 337                 if(sk && sk->debug) 
 338                         printk("empty\n");
 339                 return(0);
 340         }
 341   
 342         counted = sk->copied_seq+1;     /* Where we are at the moment */
 343         amount = 0;
 344   
 345         /* Do until a push or until we are out of data. */
 346         do 
 347         {
 348                 if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 349                         break;
 350                 sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 351                 if (skb->h.th->syn)
 352                         sum++;
 353                 if (sum >= 0) 
 354                 {                                       /* Add it up, move on */
 355                         amount += sum;
 356                         if (skb->h.th->syn) 
 357                                 amount--;
 358                         counted += sum;
 359                 }
 360                 if (amount && skb->h.th->psh) break;
 361                 skb = skb->next;
 362         }
 363         while(skb != (struct sk_buff *)&sk->receive_queue);
 364 
 365         if (amount && !sk->urginline && sk->urg_data &&
 366             (sk->urg_seq - sk->copied_seq) <= (counted - sk->copied_seq))
 367                 amount--;               /* don't count urg data */
 368         restore_flags(flags);
 369         if(sk->debug)
 370                 printk("got %lu bytes.\n",amount);
 371         return(amount);
 372 }
 373 
 374 
 375 /*
 376  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 377  *      listening socket has a receive queue of sockets to accept.
 378  */
 379 
 380 static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 381 {
 382         sk->inuse = 1;
 383 
 384         switch(sel_type) 
 385         {
 386                 case SEL_IN:
 387                         if(sk->debug)
 388                                 printk("select in");
 389                         select_wait(sk->sleep, wait);
 390                         if(sk->debug)
 391                                 printk("-select out");
 392                         if (skb_peek(&sk->receive_queue) != NULL) 
 393                         {
 394                                 if (sk->state == TCP_LISTEN || tcp_readable(sk)) 
 395                                 {
 396                                         release_sock(sk);
 397                                         if(sk->debug)
 398                                                 printk("-select ok data\n");
 399                                         return(1);
 400                                 }
 401                         }
 402                         if (sk->err != 0)       /* Receiver error */
 403                         {
 404                                 release_sock(sk);
 405                                 if(sk->debug)
 406                                         printk("-select ok error");
 407                                 return(1);
 408                         }
 409                         if (sk->shutdown & RCV_SHUTDOWN) 
 410                         {
 411                                 release_sock(sk);
 412                                 if(sk->debug)
 413                                         printk("-select ok down\n");
 414                                 return(1);
 415                         } 
 416                         else 
 417                         {
 418                                 release_sock(sk);
 419                                 if(sk->debug)
 420                                         printk("-select fail\n");
 421                                 return(0);
 422                         }
 423                 case SEL_OUT:
 424                         select_wait(sk->sleep, wait);
 425                         if (sk->shutdown & SEND_SHUTDOWN) 
 426                         {
 427                                 /* FIXME: should this return an error? */
 428                                 release_sock(sk);
 429                                 return(0);
 430                         }
 431 
 432                         /*
 433                          * FIXME:
 434                          * Hack so it will probably be able to write
 435                          * something if it says it's ok to write.
 436                          */
 437                         
 438                         if (sk->prot->wspace(sk) >= sk->mss) 
 439                         {
 440                                 release_sock(sk);
 441                                 /* This should cause connect to work ok. */
 442                                 if (sk->state == TCP_SYN_RECV ||
 443                                     sk->state == TCP_SYN_SENT) return(0);
 444                                 return(1);
 445                         }
 446                         release_sock(sk);
 447                         return(0);
 448                 case SEL_EX:
 449                         select_wait(sk->sleep,wait);
 450                         if (sk->err || sk->urg_data) 
 451                         {
 452                                 release_sock(sk);
 453                                 return(1);
 454                         }
 455                         release_sock(sk);
 456                         return(0);
 457         }
 458 
 459         release_sock(sk);
 460         return(0);
 461 }
 462 
 463 
 464 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 465 {
 466         int err;
 467         switch(cmd) 
 468         {
 469 
 470                 case TIOCINQ:
 471 #ifdef FIXME    /* FIXME: */
 472                 case FIONREAD:
 473 #endif
 474                 {
 475                         unsigned long amount;
 476 
 477                         if (sk->state == TCP_LISTEN) 
 478                                 return(-EINVAL);
 479 
 480                         sk->inuse = 1;
 481                         amount = tcp_readable(sk);
 482                         release_sock(sk);
 483                         err=verify_area(VERIFY_WRITE,(void *)arg,
 484                                                    sizeof(unsigned long));
 485                         if(err)
 486                                 return err;
 487                         put_fs_long(amount,(unsigned long *)arg);
 488                         return(0);
 489                 }
 490                 case SIOCATMARK:
 491                 {
 492                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
 493 
 494                         err = verify_area(VERIFY_WRITE,(void *) arg,
 495                                                   sizeof(unsigned long));
 496                         if (err)
 497                                 return err;
 498                         put_fs_long(answ,(int *) arg);
 499                         return(0);
 500                 }
 501                 case TIOCOUTQ:
 502                 {
 503                         unsigned long amount;
 504 
 505                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 506                         amount = sk->prot->wspace(sk);
 507                         err=verify_area(VERIFY_WRITE,(void *)arg,
 508                                                    sizeof(unsigned long));
 509                         if(err)
 510                                 return err;
 511                         put_fs_long(amount,(unsigned long *)arg);
 512                         return(0);
 513                 }
 514                 default:
 515                         return(-EINVAL);
 516         }
 517 }
 518 
 519 
 520 /*
 521  *      This routine computes a TCP checksum. 
 522  */
 523  
 524 unsigned short tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 525           unsigned long saddr, unsigned long daddr)
 526 {     
 527         unsigned long sum;
 528    
 529         if (saddr == 0) saddr = ip_my_addr();
 530 
 531 /*
 532  * stupid, gcc complains when I use just one __asm__ block,
 533  * something about too many reloads, but this is just two
 534  * instructions longer than what I want
 535  */
 536         __asm__("
 537             addl %%ecx, %%ebx
 538             adcl %%edx, %%ebx
 539             adcl $0, %%ebx
 540             "
 541         : "=b"(sum)
 542         : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 543         : "bx", "cx", "dx" );
 544         __asm__("
 545             movl %%ecx, %%edx
 546             cld
 547             cmpl $32, %%ecx
 548             jb 2f
 549             shrl $5, %%ecx
 550             clc
 551 1:          lodsl
 552             adcl %%eax, %%ebx
 553             lodsl
 554             adcl %%eax, %%ebx
 555             lodsl
 556             adcl %%eax, %%ebx
 557             lodsl
 558             adcl %%eax, %%ebx
 559             lodsl
 560             adcl %%eax, %%ebx
 561             lodsl
 562             adcl %%eax, %%ebx
 563             lodsl
 564             adcl %%eax, %%ebx
 565             lodsl
 566             adcl %%eax, %%ebx
 567             loop 1b
 568             adcl $0, %%ebx
 569             movl %%edx, %%ecx
 570 2:          andl $28, %%ecx
 571             je 4f
 572             shrl $2, %%ecx
 573             clc
 574 3:          lodsl
 575             adcl %%eax, %%ebx
 576             loop 3b
 577             adcl $0, %%ebx
 578 4:          movl $0, %%eax
 579             testw $2, %%dx
 580             je 5f
 581             lodsw
 582             addl %%eax, %%ebx
 583             adcl $0, %%ebx
 584             movw $0, %%ax
 585 5:          test $1, %%edx
 586             je 6f
 587             lodsb
 588             addl %%eax, %%ebx
 589             adcl $0, %%ebx
 590 6:          movl %%ebx, %%eax
 591             shrl $16, %%eax
 592             addw %%ax, %%bx
 593             adcw $0, %%bx
 594             "
 595         : "=b"(sum)
 596         : "0"(sum), "c"(len), "S"(th)
 597         : "ax", "bx", "cx", "dx", "si" );
 598 
 599         /* We only want the bottom 16 bits, but we never cleared the top 16. */
 600   
 601         return((~sum) & 0xffff);
 602 }
 603 
 604 
 605 
 606 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 607                 unsigned long daddr, int len, struct sock *sk)
 608 {
 609         th->check = 0;
 610         th->check = tcp_check(th, len, saddr, daddr);
 611         return;
 612 }
 613 
 614 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 615 {
 616         int size;
 617         struct tcphdr * th = skb->h.th;
 618 
 619         /* length of packet (not counting length of pre-tcp headers) */
 620         size = skb->len - ((unsigned char *) th - skb->data);
 621 
 622         /* sanity check it.. */
 623         if (size < sizeof(struct tcphdr) || size > skb->len) 
 624         {
 625                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 626                         skb, skb->data, th, skb->len);
 627                 kfree_skb(skb, FREE_WRITE);
 628                 return;
 629         }
 630 
 631         /* If we have queued a header size packet.. */
 632         if (size == sizeof(struct tcphdr)) 
 633         {
 634                 /* If its got a syn or fin its notionally included in the size..*/
 635                 if(!th->syn && !th->fin) 
 636                 {
 637                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 638                         kfree_skb(skb,FREE_WRITE);
 639                         return;
 640                 }
 641         }
 642 
 643         tcp_statistics.TcpOutSegs++;  
 644 
 645         skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
 646         if (after(skb->h.seq, sk->window_seq) ||
 647             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 648              sk->packets_out >= sk->cong_window) 
 649         {
 650                 /* checksum will be supplied by tcp_write_xmit.  So
 651                  * we shouldn't need to set it at all.  I'm being paranoid */
 652                 th->check = 0;
 653                 if (skb->next != NULL) 
 654                 {
 655                         printk("tcp_send_partial: next != NULL\n");
 656                         skb_unlink(skb);
 657                 }
 658                 skb_queue_tail(&sk->write_queue, skb);
 659                 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 660                     sk->send_head == NULL &&
 661                     sk->ack_backlog == 0)
 662                         reset_timer(sk, TIME_PROBE0, sk->rto);
 663         } 
 664         else 
 665         {
 666                 th->ack_seq = ntohl(sk->acked_seq);
 667                 th->window = ntohs(tcp_select_window(sk));
 668 
 669                 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
 670 
 671                 sk->sent_seq = sk->write_seq;
 672                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 673         }
 674 }
 675 
 676 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 677 {
 678         struct sk_buff * skb;
 679         unsigned long flags;
 680 
 681         save_flags(flags);
 682         cli();
 683         skb = sk->partial;
 684         if (skb) {
 685                 sk->partial = NULL;
 686                 del_timer(&sk->partial_timer);
 687         }
 688         restore_flags(flags);
 689         return skb;
 690 }
 691 
 692 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 693 {
 694         struct sk_buff *skb;
 695 
 696         if (sk == NULL)
 697                 return;
 698         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 699                 tcp_send_skb(sk, skb);
 700 }
 701 
 702 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 703 {
 704         struct sk_buff * tmp;
 705         unsigned long flags;
 706 
 707         save_flags(flags);
 708         cli();
 709         tmp = sk->partial;
 710         if (tmp)
 711                 del_timer(&sk->partial_timer);
 712         sk->partial = skb;
 713         init_timer(&sk->partial_timer);
 714         sk->partial_timer.expires = HZ;
 715         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 716         sk->partial_timer.data = (unsigned long) sk;
 717         add_timer(&sk->partial_timer);
 718         restore_flags(flags);
 719         if (tmp)
 720                 tcp_send_skb(sk, tmp);
 721 }
 722 
 723 
 724 /*
 725  *      This routine sends an ack and also updates the window. 
 726  */
 727  
 728 static void tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 729              struct sock *sk,
 730              struct tcphdr *th, unsigned long daddr)
 731 {
 732         struct sk_buff *buff;
 733         struct tcphdr *t1;
 734         struct device *dev = NULL;
 735         int tmp;
 736 
 737         if(sk->zapped)
 738                 return;         /* We have been reset, we may not send again */
 739         /*
 740          * We need to grab some memory, and put together an ack,
 741          * and then put it into the queue to be sent.
 742          */
 743 
 744         buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 745         if (buff == NULL) 
 746         {
 747                 /* Force it to send an ack. */
 748                 sk->ack_backlog++;
 749                 if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) 
 750                 {
 751                         reset_timer(sk, TIME_WRITE, 10);
 752                 }
 753                 return;
 754         }
 755 
 756         buff->len = sizeof(struct tcphdr);
 757         buff->sk = sk;
 758         buff->localroute = sk->localroute;
 759         t1 =(struct tcphdr *) buff->data;
 760 
 761         /* Put in the IP header and routing stuff. */
 762         tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 763                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 764         if (tmp < 0) 
 765         {
 766                 buff->free=1;
 767                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 768                 return;
 769         }
 770         buff->len += tmp;
 771         t1 =(struct tcphdr *)((char *)t1 +tmp);
 772 
 773         /* FIXME: */
 774         memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 775 
 776         /*
 777          *      Swap the send and the receive. 
 778          */
 779          
 780         t1->dest = th->source;
 781         t1->source = th->dest;
 782         t1->seq = ntohl(sequence);
 783         t1->ack = 1;
 784         sk->window = tcp_select_window(sk);
 785         t1->window = ntohs(sk->window);
 786         t1->res1 = 0;
 787         t1->res2 = 0;
 788         t1->rst = 0;
 789         t1->urg = 0;
 790         t1->syn = 0;
 791         t1->psh = 0;
 792         t1->fin = 0;
 793         if (ack == sk->acked_seq) 
 794         {
 795                 sk->ack_backlog = 0;
 796                 sk->bytes_rcv = 0;
 797                 sk->ack_timed = 0;
 798                 if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
 799                                   && sk->timeout == TIME_WRITE) 
 800                 {
 801                         if(sk->keepopen) {
 802                                 reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 803                         } else {
 804                                 delete_timer(sk);
 805                         }
 806                 }
 807         }
 808         t1->ack_seq = ntohl(ack);
 809         t1->doff = sizeof(*t1)/4;
 810         tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 811         if (sk->debug)
 812                  printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 813         tcp_statistics.TcpOutSegs++;
 814         sk->prot->queue_xmit(sk, dev, buff, 1);
 815 }
 816 
 817 
 818 /* 
 819  *      This routine builds a generic TCP header. 
 820  */
 821  
 822 static int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 823 {
 824 
 825         /* FIXME: want to get rid of this. */
 826         memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 827         th->seq = htonl(sk->write_seq);
 828         th->psh =(push == 0) ? 1 : 0;
 829         th->doff = sizeof(*th)/4;
 830         th->ack = 1;
 831         th->fin = 0;
 832         sk->ack_backlog = 0;
 833         sk->bytes_rcv = 0;
 834         sk->ack_timed = 0;
 835         th->ack_seq = htonl(sk->acked_seq);
 836         sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/;
 837         th->window = htons(sk->window);
 838 
 839         return(sizeof(*th));
 840 }
 841 
 842 /*
 843  *      This routine copies from a user buffer into a socket,
 844  *      and starts the transmit system.
 845  */
 846 
 847 static int tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 848           int len, int nonblock, unsigned flags)
 849 {
 850         int copied = 0;
 851         int copy;
 852         int tmp;
 853         struct sk_buff *skb;
 854         struct sk_buff *send_tmp;
 855         unsigned char *buff;
 856         struct proto *prot;
 857         struct device *dev = NULL;
 858 
 859         sk->inuse=1;
 860         prot = sk->prot;
 861         while(len > 0) 
 862         {
 863                 if (sk->err) 
 864                 {                       /* Stop on an error */
 865                         release_sock(sk);
 866                         if (copied) 
 867                                 return(copied);
 868                         tmp = -sk->err;
 869                         sk->err = 0;
 870                         return(tmp);
 871                 }
 872 
 873         /*
 874          *      First thing we do is make sure that we are established. 
 875          */
 876         
 877                 if (sk->shutdown & SEND_SHUTDOWN) 
 878                 {
 879                         release_sock(sk);
 880                         sk->err = EPIPE;
 881                         if (copied) 
 882                                 return(copied);
 883                         sk->err = 0;
 884                         return(-EPIPE);
 885                 }
 886 
 887 
 888         /* 
 889          *      Wait for a connection to finish.
 890          */
 891         
 892                 while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
 893                 {
 894                         if (sk->err) 
 895                         {
 896                                 release_sock(sk);
 897                                 if (copied) 
 898                                         return(copied);
 899                                 tmp = -sk->err;
 900                                 sk->err = 0;
 901                                 return(tmp);
 902                         }
 903 
 904                         if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
 905                         {
 906                                 release_sock(sk);
 907                                 if (copied) 
 908                                         return(copied);
 909 
 910                                 if (sk->err) 
 911                                 {
 912                                         tmp = -sk->err;
 913                                         sk->err = 0;
 914                                         return(tmp);
 915                                 }
 916 
 917                                 if (sk->keepopen) 
 918                                 {
 919                                         send_sig(SIGPIPE, current, 0);
 920                                 }
 921                                 return(-EPIPE);
 922                         }
 923 
 924                         if (nonblock || copied) 
 925                         {
 926                                 release_sock(sk);
 927                                 if (copied) 
 928                                         return(copied);
 929                                 return(-EAGAIN);
 930                         }
 931 
 932                         release_sock(sk);
 933                         cli();
 934                 
 935                         if (sk->state != TCP_ESTABLISHED &&
 936                                 sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
 937                         {
 938                                 interruptible_sleep_on(sk->sleep);
 939                                 if (current->signal & ~current->blocked) 
 940                                 {
 941                                         sti();
 942                                         if (copied) 
 943                                                 return(copied);
 944                                         return(-ERESTARTSYS);
 945                                 }
 946                         }
 947                         sk->inuse = 1;
 948                         sti();
 949                 }
 950 
 951         /*
 952          * The following code can result in copy <= if sk->mss is ever
 953          * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
 954          * sk->mtu is constant once SYN processing is finished.  I.e. we
 955          * had better not get here until we've seen his SYN and at least one
 956          * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
 957          * But ESTABLISHED should guarantee that.  sk->max_window is by definition
 958          * non-decreasing.  Note that any ioctl to set user_mss must be done
 959          * before the exchange of SYN's.  If the initial ack from the other
 960          * end has a window of 0, max_window and thus mss will both be 0.
 961          */
 962 
 963         /* 
 964          *      Now we need to check if we have a half built packet. 
 965          */
 966 
 967                 if ((skb = tcp_dequeue_partial(sk)) != NULL) 
 968                 {
 969                         int hdrlen;
 970 
 971                          /* IP header + TCP header */
 972                         hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
 973                                  + sizeof(struct tcphdr);
 974         
 975                         /* Add more stuff to the end of skb->len */
 976                         if (!(flags & MSG_OOB)) 
 977                         {
 978                                 copy = min(sk->mss - (skb->len - hdrlen), len);
 979                                 /* FIXME: this is really a bug. */
 980                                 if (copy <= 0) 
 981                                 {
 982                                         printk("TCP: **bug**: \"copy\" <= 0!!\n");
 983                                         copy = 0;
 984                                 }
 985           
 986                                 memcpy_fromfs(skb->data + skb->len, from, copy);
 987                                 skb->len += copy;
 988                                 from += copy;
 989                                 copied += copy;
 990                                 len -= copy;
 991                                 sk->write_seq += copy;
 992                         }
 993                         if ((skb->len - hdrlen) >= sk->mss ||
 994                                 (flags & MSG_OOB) || !sk->packets_out)
 995                                 tcp_send_skb(sk, skb);
 996                         else
 997                                 tcp_enqueue_partial(skb, sk);
 998                         continue;
 999                 }
1000 
1001         /*
1002          * We also need to worry about the window.
1003          * If window < 1/2 the maximum window we've seen from this
1004          *   host, don't use it.  This is sender side
1005          *   silly window prevention, as specified in RFC1122.
1006          *   (Note that this is different than earlier versions of
1007          *   SWS prevention, e.g. RFC813.).  What we actually do is 
1008          *   use the whole MSS.  Since the results in the right
1009          *   edge of the packet being outside the window, it will
1010          *   be queued for later rather than sent.
1011          */
1012 
1013                 copy = sk->window_seq - sk->write_seq;
1014                 if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
1015                         copy = sk->mss;
1016                 if (copy > len)
1017                         copy = len;
1018 
1019         /*
1020          *      We should really check the window here also. 
1021          */
1022          
1023                 send_tmp = NULL;
1024                 if (copy < sk->mss && !(flags & MSG_OOB)) 
1025                 {
1026                         /*
1027                          *      We will release the socket incase we sleep here. 
1028                          */
1029                         release_sock(sk);
1030                         /*
1031                          *      NB: following must be mtu, because mss can be increased.
1032                          *      mss is always <= mtu 
1033                          */
1034                         skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
1035                         sk->inuse = 1;
1036                         send_tmp = skb;
1037                 } 
1038                 else 
1039                 {
1040                         /*
1041                          *      We will release the socket incase we sleep here. 
1042                          */
1043                         release_sock(sk);
1044                         skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
1045                         sk->inuse = 1;
1046                 }
1047 
1048                 /*
1049                  *      If we didn't get any memory, we need to sleep. 
1050                  */
1051 
1052                 if (skb == NULL) 
1053                 {
1054                         if (nonblock /* || copied */) 
1055                         {
1056                                 release_sock(sk);
1057                                 if (copied) 
1058                                         return(copied);
1059                                 return(-EAGAIN);
1060                         }
1061 
1062                         /*
1063                          *      FIXME: here is another race condition. 
1064                          */
1065 
1066                         tmp = sk->wmem_alloc;
1067                         release_sock(sk);
1068                         cli();
1069                         /*
1070                          *      Again we will try to avoid it. 
1071                          */
1072                         if (tmp <= sk->wmem_alloc &&
1073                                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
1074                                 && sk->err == 0) 
1075                         {
1076                                 interruptible_sleep_on(sk->sleep);
1077                                 if (current->signal & ~current->blocked) 
1078                                 {
1079                                         sti();
1080                                         if (copied) 
1081                                                 return(copied);
1082                                         return(-ERESTARTSYS);
1083                                 }
1084                         }
1085                         sk->inuse = 1;
1086                         sti();
1087                         continue;
1088                 }
1089 
1090                 skb->len = 0;
1091                 skb->sk = sk;
1092                 skb->free = 0;
1093                 skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
1094         
1095                 buff = skb->data;
1096         
1097                 /*
1098                  * FIXME: we need to optimize this.
1099                  * Perhaps some hints here would be good.
1100                  */
1101                 
1102                 tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1103                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1104                 if (tmp < 0 ) 
1105                 {
1106                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1107                         release_sock(sk);
1108                         if (copied) 
1109                                 return(copied);
1110                         return(tmp);
1111                 }
1112                 skb->len += tmp;
1113                 skb->dev = dev;
1114                 buff += tmp;
1115                 skb->h.th =(struct tcphdr *) buff;
1116                 tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1117                 if (tmp < 0) 
1118                 {
1119                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1120                         release_sock(sk);
1121                         if (copied) 
1122                                 return(copied);
1123                         return(tmp);
1124                 }
1125 
1126                 if (flags & MSG_OOB) 
1127                 {
1128                         ((struct tcphdr *)buff)->urg = 1;
1129                         ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1130                 }
1131                 skb->len += tmp;
1132                 memcpy_fromfs(buff+tmp, from, copy);
1133 
1134                 from += copy;
1135                 copied += copy;
1136                 len -= copy;
1137                 skb->len += copy;
1138                 skb->free = 0;
1139                 sk->write_seq += copy;
1140         
1141                 if (send_tmp != NULL && sk->packets_out) 
1142                 {
1143                         tcp_enqueue_partial(send_tmp, sk);
1144                         continue;
1145                 }
1146                 tcp_send_skb(sk, skb);
1147         }
1148         sk->err = 0;
1149 
1150 /*
1151  *      Nagle's rule. Turn Nagle off with TCP_NODELAY for highly
1152  *      interactive fast network servers. It's meant to be on and
1153  *      it really improves the throughput though not the echo time
1154  *      on my slow slip link - Alan
1155  */
1156 
1157 /*
1158  *      Avoid possible race on send_tmp - c/o Johannes Stille 
1159  */
1160  
1161         if(sk->partial && ((!sk->packets_out) 
1162      /* If not nagling we can send on the before case too.. */
1163               || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1164         ))
1165                 tcp_send_partial(sk);
1166 
1167         release_sock(sk);
1168         return(copied);
1169 }
1170 
1171 
1172 static int tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1173            int len, int nonblock, unsigned flags,
1174            struct sockaddr_in *addr, int addr_len)
1175 {
1176         if (flags & ~(MSG_OOB|MSG_DONTROUTE))
1177                 return -EINVAL;
1178         if (addr_len < sizeof(*addr)) 
1179                 return(-EINVAL);
1180         if (addr->sin_family && addr->sin_family != AF_INET) 
1181                 return(-EINVAL);
1182         if (addr->sin_port != sk->dummy_th.dest) 
1183                 return(-EISCONN);
1184         if (addr->sin_addr.s_addr != sk->daddr) 
1185                 return(-EISCONN);
1186         return(tcp_write(sk, from, len, nonblock, flags));
1187 }
1188 
1189 
1190 static void tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1191 {
1192         int tmp;
1193         struct device *dev = NULL;
1194         struct tcphdr *t1;
1195         struct sk_buff *buff;
1196 
1197         if (!sk->ack_backlog) 
1198                 return;
1199 
1200         /*
1201          * FIXME: we need to put code here to prevent this routine from
1202          * being called.  Being called once in a while is ok, so only check
1203          * if this is the second time in a row.
1204          */
1205 
1206         /*
1207          * We need to grab some memory, and put together an ack,
1208          * and then put it into the queue to be sent.
1209          */
1210 
1211         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1212         if (buff == NULL) 
1213         {
1214                 /* Try again real soon. */
1215                 reset_timer(sk, TIME_WRITE, 10);
1216                 return;
1217         }
1218 
1219         buff->len = sizeof(struct tcphdr);
1220         buff->sk = sk;
1221         buff->localroute = sk->localroute;
1222         
1223         /*
1224          *      Put in the IP header and routing stuff. 
1225          */
1226 
1227         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1228                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1229         if (tmp < 0) 
1230         {
1231                 buff->free=1;
1232                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1233                 return;
1234         }
1235 
1236         buff->len += tmp;
1237         t1 =(struct tcphdr *)(buff->data +tmp);
1238 
1239         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1240         t1->seq = htonl(sk->sent_seq);
1241         t1->ack = 1;
1242         t1->res1 = 0;
1243         t1->res2 = 0;
1244         t1->rst = 0;
1245         t1->urg = 0;
1246         t1->syn = 0;
1247         t1->psh = 0;
1248         sk->ack_backlog = 0;
1249         sk->bytes_rcv = 0;
1250         sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
1251         t1->window = ntohs(sk->window);
1252         t1->ack_seq = ntohl(sk->acked_seq);
1253         t1->doff = sizeof(*t1)/4;
1254         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1255         sk->prot->queue_xmit(sk, dev, buff, 1);
1256         tcp_statistics.TcpOutSegs++;
1257 }
1258 
1259 
1260 /*
1261  *      FIXME:
1262  *      This routine frees used buffers.
1263  *      It should consider sending an ACK to let the
1264  *      other end know we now have a bigger window.
1265  */
1266 
1267 static void cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1268 {
1269         unsigned long flags;
1270         unsigned long left;
1271         struct sk_buff *skb;
1272         unsigned long rspace;
1273 
1274         if(sk->debug)
1275                 printk("cleaning rbuf for sk=%p\n", sk);
1276   
1277         save_flags(flags);
1278         cli();
1279   
1280         left = sk->prot->rspace(sk);
1281  
1282         /*
1283          * We have to loop through all the buffer headers,
1284          * and try to free up all the space we can.
1285          */
1286 
1287         while((skb=skb_peek(&sk->receive_queue)) != NULL) 
1288         {
1289                 if (!skb->used) 
1290                         break;
1291                 skb_unlink(skb);
1292                 skb->sk = sk;
1293                 kfree_skb(skb, FREE_READ);
1294         }
1295 
1296         restore_flags(flags);
1297 
1298         /*
1299          * FIXME:
1300          * At this point we should send an ack if the difference
1301          * in the window, and the amount of space is bigger than
1302          * TCP_WINDOW_DIFF.
1303          */
1304 
1305         if(sk->debug)
1306                 printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk),
1307                                             left);
1308         if ((rspace=sk->prot->rspace(sk)) != left) 
1309         {
1310                 /*
1311                  * This area has caused the most trouble.  The current strategy
1312                  * is to simply do nothing if the other end has room to send at
1313                  * least 3 full packets, because the ack from those will auto-
1314                  * matically update the window.  If the other end doesn't think
1315                  * we have much space left, but we have room for at least 1 more
1316                  * complete packet than it thinks we do, we will send an ack
1317                  * immediately.  Otherwise we will wait up to .5 seconds in case
1318                  * the user reads some more.
1319                  */
1320                 sk->ack_backlog++;
1321         /*
1322          * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1323          * if the other end is offering a window smaller than the agreed on MSS
1324          * (called sk->mtu here).  In theory there's no connection between send
1325          * and receive, and so no reason to think that they're going to send
1326          * small packets.  For the moment I'm using the hack of reducing the mss
1327          * only on the send side, so I'm putting mtu here.
1328          */
1329 
1330                 if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) 
1331                 {
1332                         /* Send an ack right now. */
1333                         tcp_read_wakeup(sk);
1334                 } 
1335                 else 
1336                 {
1337                         /* Force it to send an ack soon. */
1338                         int was_active = del_timer(&sk->timer);
1339                         if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
1340                         {
1341                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1342                         } 
1343                         else
1344                                 add_timer(&sk->timer);
1345                 }
1346         }
1347 } 
1348 
1349 
1350 /*
1351  *      Handle reading urgent data. 
1352  */
1353  
1354 static int tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1355              unsigned char *to, int len, unsigned flags)
1356 {
1357         struct wait_queue wait = { current, NULL };
1358 
1359         while (len > 0) 
1360         {
1361                 if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1362                         return -EINVAL;
1363                 if (sk->urg_data & URG_VALID) 
1364                 {
1365                         char c = sk->urg_data;
1366                         if (!(flags & MSG_PEEK))
1367                                 sk->urg_data = URG_READ;
1368                         put_fs_byte(c, to);
1369                         return 1;
1370                 }
1371 
1372                 if (sk->err) 
1373                 {
1374                         int tmp = -sk->err;
1375                         sk->err = 0;
1376                         return tmp;
1377                 }
1378 
1379                 if (sk->state == TCP_CLOSE || sk->done) 
1380                 {
1381                         if (!sk->done) {
1382                                 sk->done = 1;
1383                                 return 0;
1384                         }
1385                         return -ENOTCONN;
1386                 }
1387 
1388                 if (sk->shutdown & RCV_SHUTDOWN) 
1389                 {
1390                         sk->done = 1;
1391                         return 0;
1392                 }
1393 
1394                 if (nonblock)
1395                         return -EAGAIN;
1396 
1397                 if (current->signal & ~current->blocked)
1398                         return -ERESTARTSYS;
1399 
1400                 current->state = TASK_INTERRUPTIBLE;
1401                 add_wait_queue(sk->sleep, &wait);
1402                 if ((sk->urg_data & URG_NOTYET) && sk->err == 0 &&
1403                     !(sk->shutdown & RCV_SHUTDOWN))
1404                         schedule();
1405                 remove_wait_queue(sk->sleep, &wait);
1406                 current->state = TASK_RUNNING;
1407         }
1408         return 0;
1409 }
1410 
1411 
1412 /*
1413  *      This routine copies from a sock struct into the user buffer. 
1414  */
1415  
1416 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1417         int len, int nonblock, unsigned flags)
1418 {
1419         struct wait_queue wait = { current, NULL };
1420         int copied = 0;
1421         unsigned long peek_seq;
1422         unsigned long *seq;
1423         unsigned long used;
1424 
1425         /* This error should be checked. */
1426         if (sk->state == TCP_LISTEN)
1427                 return -ENOTCONN;
1428 
1429         /* Urgent data needs to be handled specially. */
1430         if (flags & MSG_OOB)
1431                 return tcp_read_urg(sk, nonblock, to, len, flags);
1432 
1433         peek_seq = sk->copied_seq;
1434         seq = &sk->copied_seq;
1435         if (flags & MSG_PEEK)
1436                 seq = &peek_seq;
1437 
1438         add_wait_queue(sk->sleep, &wait);
1439         sk->inuse = 1;
1440         while (len > 0) 
1441         {
1442                 struct sk_buff * skb;
1443                 unsigned long offset;
1444         
1445                 /*
1446                  * are we at urgent data? Stop if we have read anything.
1447                  */
1448                 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1449                         break;
1450 
1451                 current->state = TASK_INTERRUPTIBLE;
1452 
1453                 skb = skb_peek(&sk->receive_queue);
1454                 do 
1455                 {
1456                         if (!skb)
1457                                 break;
1458                         if (before(1+*seq, skb->h.th->seq))
1459                                 break;
1460                         offset = 1 + *seq - skb->h.th->seq;
1461                         if (skb->h.th->syn)
1462                                 offset--;
1463                         if (offset < skb->len)
1464                                 goto found_ok_skb;
1465                         if (!(flags & MSG_PEEK))
1466                                 skb->used = 1;
1467                         skb = skb->next;
1468                 }
1469                 while (skb != (struct sk_buff *)&sk->receive_queue);
1470 
1471                 if (copied)
1472                         break;
1473 
1474                 if (sk->err) 
1475                 {
1476                         copied = -sk->err;
1477                         sk->err = 0;
1478                         break;
1479                 }
1480 
1481                 if (sk->state == TCP_CLOSE) 
1482                 {
1483                         if (!sk->done) 
1484                         {
1485                                 sk->done = 1;
1486                                 break;
1487                         }
1488                         copied = -ENOTCONN;
1489                         break;
1490                 }
1491 
1492                 if (sk->shutdown & RCV_SHUTDOWN) 
1493                 {
1494                         sk->done = 1;
1495                         break;
1496                 }
1497                         
1498                 if (nonblock) 
1499                 {
1500                         copied = -EAGAIN;
1501                         break;
1502                 }
1503 
1504                 cleanup_rbuf(sk);
1505                 release_sock(sk);
1506                 schedule();
1507                 sk->inuse = 1;
1508 
1509                 if (current->signal & ~current->blocked) 
1510                 {
1511                         copied = -ERESTARTSYS;
1512                         break;
1513                 }
1514                 continue;
1515 
1516         found_ok_skb:
1517                 /* Ok so how much can we use ? */
1518                 used = skb->len - offset;
1519                 if (len < used)
1520                         used = len;
1521                 /* do we have urgent data here? */
1522                 if (sk->urg_data) 
1523                 {
1524                         unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1525                         if (urg_offset < used) 
1526                         {
1527                                 if (!urg_offset) 
1528                                 {
1529                                         if (!sk->urginline) 
1530                                         {
1531                                                 ++*seq;
1532                                                 offset++;
1533                                                 used--;
1534                                         }
1535                                 }
1536                                 else
1537                                         used = urg_offset;
1538                         }
1539                 }
1540                 /* Copy it */
1541                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1542                         skb->h.th->doff*4 + offset, used);
1543                 copied += used;
1544                 len -= used;
1545                 to += used;
1546                 *seq += used;
1547                 if (after(sk->copied_seq+1,sk->urg_seq))
1548                         sk->urg_data = 0;
1549                 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1550                         skb->used = 1;
1551         }
1552         remove_wait_queue(sk->sleep, &wait);
1553         current->state = TASK_RUNNING;
1554 
1555         /* Clean up data we have read: This will do ACK frames */
1556         cleanup_rbuf(sk);
1557         release_sock(sk);
1558         return copied;
1559 }
1560 
1561  
1562 /*
1563  *      Shutdown the sending side of a connection.
1564  */
1565 
1566 void tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1567 {
1568         struct sk_buff *buff;
1569         struct tcphdr *t1, *th;
1570         struct proto *prot;
1571         int tmp;
1572         struct device *dev = NULL;
1573 
1574         /*
1575          * We need to grab some memory, and put together a FIN,
1576          * and then put it into the queue to be sent.
1577          * FIXME:
1578          *
1579          *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1580          *      Most of this is guesswork, so maybe it will work...
1581          */
1582 
1583         if (!(how & SEND_SHUTDOWN)) 
1584                 return;
1585          
1586         /*
1587          *      If we've already sent a FIN, return. 
1588          */
1589          
1590         if (sk->state == TCP_FIN_WAIT1 ||
1591             sk->state == TCP_FIN_WAIT2 ||
1592             sk->state == TCP_CLOSING ||
1593             sk->state == TCP_LAST_ACK ||
1594             sk->state == TCP_TIME_WAIT
1595         ) 
1596         {
1597                 return;
1598         }
1599         sk->inuse = 1;
1600 
1601         /*
1602          * flag that the sender has shutdown
1603          */
1604 
1605         sk->shutdown |= SEND_SHUTDOWN;
1606 
1607         /*
1608          *  Clear out any half completed packets. 
1609          */
1610 
1611         if (sk->partial)
1612                 tcp_send_partial(sk);
1613 
1614         prot =(struct proto *)sk->prot;
1615         th =(struct tcphdr *)&sk->dummy_th;
1616         release_sock(sk); /* incase the malloc sleeps. */
1617         buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1618         if (buff == NULL)
1619                 return;
1620         sk->inuse = 1;
1621 
1622         buff->sk = sk;
1623         buff->len = sizeof(*t1);
1624         buff->localroute = sk->localroute;
1625         t1 =(struct tcphdr *) buff->data;
1626 
1627         /*
1628          *      Put in the IP header and routing stuff. 
1629          */
1630 
1631         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1632                            IPPROTO_TCP, sk->opt,
1633                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1634         if (tmp < 0) 
1635         {
1636                 /*
1637                  *      Finish anyway, treat this as a send that got lost. 
1638                  *
1639                  *      Enter FIN_WAIT1 on normal shutdown, which waits for
1640                  *      written data to be completely acknowledged along
1641                  *      with an acknowledge to our FIN.
1642                  *
1643                  *      Enter FIN_WAIT2 on abnormal shutdown -- close before
1644                  *      connection established.
1645                  */
1646                 buff->free=1;
1647                 prot->wfree(sk,buff->mem_addr, buff->mem_len);
1648 
1649                 if (sk->state == TCP_ESTABLISHED)
1650                         sk->state = TCP_FIN_WAIT1;
1651                 else if(sk->state == TCP_CLOSE_WAIT)
1652                         sk->state = TCP_LAST_ACK;
1653                 else
1654                         sk->state = TCP_FIN_WAIT2;
1655 
1656                 release_sock(sk);
1657                 return;
1658         }
1659 
1660         t1 =(struct tcphdr *)((char *)t1 +tmp);
1661         buff->len += tmp;
1662         buff->dev = dev;
1663         memcpy(t1, th, sizeof(*t1));
1664         t1->seq = ntohl(sk->write_seq);
1665         sk->write_seq++;
1666         buff->h.seq = sk->write_seq;
1667         t1->ack = 1;
1668         t1->ack_seq = ntohl(sk->acked_seq);
1669         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
1670         t1->fin = 1;
1671         t1->rst = 0;
1672         t1->doff = sizeof(*t1)/4;
1673         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1674 
1675         /*
1676          * If there is data in the write queue, the fin must be appended to
1677          * the write queue.
1678          */
1679         
1680         if (skb_peek(&sk->write_queue) != NULL) 
1681         {
1682                 buff->free=0;
1683                 if (buff->next != NULL) 
1684                 {
1685                         printk("tcp_shutdown: next != NULL\n");
1686                         skb_unlink(buff);
1687                 }
1688                 skb_queue_tail(&sk->write_queue, buff);
1689         } 
1690         else 
1691         {
1692                 sk->sent_seq = sk->write_seq;
1693                 sk->prot->queue_xmit(sk, dev, buff, 0);
1694         }
1695 
1696         if (sk->state == TCP_ESTABLISHED) 
1697                 sk->state = TCP_FIN_WAIT1;
1698         else if (sk->state == TCP_CLOSE_WAIT)
1699                 sk->state = TCP_LAST_ACK;
1700         else
1701                 sk->state = TCP_FIN_WAIT2;
1702 
1703         release_sock(sk);
1704 }
1705 
1706 
1707 static int
1708 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1709              int to_len, int nonblock, unsigned flags,
1710              struct sockaddr_in *addr, int *addr_len)
1711 {
1712         int result;
1713   
1714         /* 
1715          *      Have to check these first unlike the old code. If 
1716          *      we check them after we lose data on an error
1717          *      which is wrong 
1718          */
1719 
1720         if(addr_len)
1721                 *addr_len = sizeof(*addr);
1722         result=tcp_read(sk, to, to_len, nonblock, flags);
1723 
1724         if (result < 0) 
1725                 return(result);
1726   
1727         if(addr)
1728         {
1729                 addr->sin_family = AF_INET;
1730                 addr->sin_port = sk->dummy_th.dest;
1731                 addr->sin_addr.s_addr = sk->daddr;
1732         }
1733         return(result);
1734 }
1735 
1736 
1737 /*
1738  *      This routine will send an RST to the other tcp. 
1739  */
1740  
1741 static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1742           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1743 {
1744         struct sk_buff *buff;
1745         struct tcphdr *t1;
1746         int tmp;
1747         struct device *ndev=NULL;
1748   
1749 /*
1750  * We need to grab some memory, and put together an RST,
1751  * and then put it into the queue to be sent.
1752  */
1753 
1754         buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1755         if (buff == NULL) 
1756                 return;
1757 
1758         buff->len = sizeof(*t1);
1759         buff->sk = NULL;
1760         buff->dev = dev;
1761         buff->localroute = 0;
1762 
1763         t1 =(struct tcphdr *) buff->data;
1764 
1765         /*
1766          *      Put in the IP header and routing stuff. 
1767          */
1768 
1769         tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
1770                            sizeof(struct tcphdr),tos,ttl);
1771         if (tmp < 0) 
1772         {
1773                 buff->free = 1;
1774                 prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1775                 return;
1776         }
1777 
1778         t1 =(struct tcphdr *)((char *)t1 +tmp);
1779         buff->len += tmp;
1780         memcpy(t1, th, sizeof(*t1));
1781 
1782         /*
1783          *      Swap the send and the receive. 
1784          */
1785 
1786         t1->dest = th->source;
1787         t1->source = th->dest;
1788         t1->rst = 1;  
1789         t1->window = 0;
1790   
1791         if(th->ack)
1792         {
1793                 t1->ack = 0;
1794                 t1->seq = th->ack_seq;
1795                 t1->ack_seq = 0;
1796         }
1797         else
1798         {
1799                 t1->ack = 1;
1800                 if(!th->syn)
1801                         t1->ack_seq=htonl(th->seq);
1802                 else
1803                         t1->ack_seq=htonl(th->seq+1);
1804                 t1->seq=0;
1805         }
1806 
1807         t1->syn = 0;
1808         t1->urg = 0;
1809         t1->fin = 0;
1810         t1->psh = 0;
1811         t1->doff = sizeof(*t1)/4;
1812         tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1813         prot->queue_xmit(NULL, dev, buff, 1);
1814         tcp_statistics.TcpOutSegs++;
1815 }
1816 
1817 
1818 /*
1819  *      Look for tcp options. Parses everything but only knows about MSS.
1820  *      This routine is always called with the packet containing the SYN.
1821  *      However it may also be called with the ack to the SYN.  So you
1822  *      can't assume this is always the SYN.  It's always called after
1823  *      we have set up sk->mtu to our own MTU.
1824  */
1825  
1826 static void tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1827 {
1828         unsigned char *ptr;
1829         int length=(th->doff*4)-sizeof(struct tcphdr);
1830         int mss_seen = 0;
1831     
1832         ptr = (unsigned char *)(th + 1);
1833   
1834         while(length>0)
1835         {
1836                 int opcode=*ptr++;
1837                 int opsize=*ptr++;
1838                 switch(opcode)
1839                 {
1840                         case TCPOPT_EOL:
1841                                 return;
1842                         case TCPOPT_NOP:
1843                                 length-=2;
1844                                 continue;
1845                         
1846                         default:
1847                                 if(opsize<=2)   /* Avoid silly options looping forever */
1848                                         return;
1849                                 switch(opcode)
1850                                 {
1851                                         case TCPOPT_MSS:
1852                                                 if(opsize==4 && th->syn)
1853                                                 {
1854                                                         sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1855                                                         mss_seen = 1;
1856                                                 }
1857                                                 break;
1858                                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1859                                 }
1860                                 ptr+=opsize-2;
1861                                 length-=opsize;
1862                 }
1863         }
1864         if (th->syn) 
1865         {
1866                 if (! mss_seen)
1867                       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1868         }
1869 #ifdef CONFIG_INET_PCTCP
1870         sk->mss = min(sk->max_window >> 1, sk->mtu);
1871 #else    
1872         sk->mss = min(sk->max_window, sk->mtu);
1873 #endif  
1874 }
1875 
1876 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1877 {
1878         dst = ntohl(dst);
1879         if (IN_CLASSA(dst))
1880                 return htonl(IN_CLASSA_NET);
1881         if (IN_CLASSB(dst))
1882                 return htonl(IN_CLASSB_NET);
1883         return htonl(IN_CLASSC_NET);
1884 }
1885 
1886 /*
1887  *      This routine handles a connection request.
1888  *      It should make sure we haven't already responded.
1889  *      Because of the way BSD works, we have to send a syn/ack now.
1890  *      This also means it will be harder to close a socket which is
1891  *      listening.
1892  */
1893  
1894 static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1895                  unsigned long daddr, unsigned long saddr,
1896                  struct options *opt, struct device *dev)
1897 {
1898         struct sk_buff *buff;
1899         struct tcphdr *t1;
1900         unsigned char *ptr;
1901         struct sock *newsk;
1902         struct tcphdr *th;
1903         struct device *ndev=NULL;
1904         int tmp;
1905         struct rtable *rt;
1906   
1907         th = skb->h.th;
1908 
1909         /* If the socket is dead, don't accept the connection. */
1910         if (!sk->dead) 
1911         {
1912                 sk->data_ready(sk,0);
1913         }
1914         else 
1915         {
1916                 tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1917                 tcp_statistics.TcpAttemptFails++;
1918                 kfree_skb(skb, FREE_READ);
1919                 return;
1920         }
1921 
1922         /*
1923          * Make sure we can accept more.  This will prevent a
1924          * flurry of syns from eating up all our memory.
1925          */
1926 
1927         if (sk->ack_backlog >= sk->max_ack_backlog) 
1928         {
1929                 tcp_statistics.TcpAttemptFails++;
1930                 kfree_skb(skb, FREE_READ);
1931                 return;
1932         }
1933 
1934         /*
1935          * We need to build a new sock struct.
1936          * It is sort of bad to have a socket without an inode attached
1937          * to it, but the wake_up's will just wake up the listening socket,
1938          * and if the listening socket is destroyed before this is taken
1939          * off of the queue, this will take care of it.
1940          */
1941 
1942         newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
1943         if (newsk == NULL) 
1944         {
1945                 /* just ignore the syn.  It will get retransmitted. */
1946                 tcp_statistics.TcpAttemptFails++;
1947                 kfree_skb(skb, FREE_READ);
1948                 return;
1949         }
1950 
1951         memcpy(newsk, sk, sizeof(*newsk));
1952         skb_queue_head_init(&newsk->write_queue);
1953         skb_queue_head_init(&newsk->receive_queue);
1954         newsk->send_head = NULL;
1955         newsk->send_tail = NULL;
1956         skb_queue_head_init(&newsk->back_log);
1957         newsk->rtt = 0;         /*TCP_CONNECT_TIME<<3*/
1958         newsk->rto = TCP_TIMEOUT_INIT;
1959         newsk->mdev = 0;
1960         newsk->max_window = 0;
1961         newsk->cong_window = 1;
1962         newsk->cong_count = 0;
1963         newsk->ssthresh = 0;
1964         newsk->backoff = 0;
1965         newsk->blog = 0;
1966         newsk->intr = 0;
1967         newsk->proc = 0;
1968         newsk->done = 0;
1969         newsk->partial = NULL;
1970         newsk->pair = NULL;
1971         newsk->wmem_alloc = 0;
1972         newsk->rmem_alloc = 0;
1973         newsk->localroute = sk->localroute;
1974 
1975         newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
1976 
1977         newsk->err = 0;
1978         newsk->shutdown = 0;
1979         newsk->ack_backlog = 0;
1980         newsk->acked_seq = skb->h.th->seq+1;
1981         newsk->fin_seq = skb->h.th->seq;
1982         newsk->copied_seq = skb->h.th->seq;
1983         newsk->state = TCP_SYN_RECV;
1984         newsk->timeout = 0;
1985         newsk->write_seq = jiffies * SEQ_TICK - seq_offset;
1986         newsk->window_seq = newsk->write_seq;
1987         newsk->rcv_ack_seq = newsk->write_seq;
1988         newsk->urg_data = 0;
1989         newsk->retransmits = 0;
1990         newsk->destroy = 0;
1991         init_timer(&newsk->timer);
1992         newsk->timer.data = (unsigned long)newsk;
1993         newsk->timer.function = &net_timer;
1994         newsk->dummy_th.source = skb->h.th->dest;
1995         newsk->dummy_th.dest = skb->h.th->source;
1996         
1997         /*
1998          *      Swap these two, they are from our point of view. 
1999          */
2000          
2001         newsk->daddr = saddr;
2002         newsk->saddr = daddr;
2003 
2004         put_sock(newsk->num,newsk);
2005         newsk->dummy_th.res1 = 0;
2006         newsk->dummy_th.doff = 6;
2007         newsk->dummy_th.fin = 0;
2008         newsk->dummy_th.syn = 0;
2009         newsk->dummy_th.rst = 0;        
2010         newsk->dummy_th.psh = 0;
2011         newsk->dummy_th.ack = 0;
2012         newsk->dummy_th.urg = 0;
2013         newsk->dummy_th.res2 = 0;
2014         newsk->acked_seq = skb->h.th->seq + 1;
2015         newsk->copied_seq = skb->h.th->seq;
2016 
2017         /*
2018          *      Grab the ttl and tos values and use them 
2019          */
2020 
2021         newsk->ip_ttl=sk->ip_ttl;
2022         newsk->ip_tos=skb->ip_hdr->tos;
2023 
2024         /*
2025          *      Use 512 or whatever user asked for 
2026          */
2027 
2028         /*
2029          *      Note use of sk->user_mss, since user has no direct access to newsk 
2030          */
2031 
2032         rt=ip_rt_route(saddr, NULL,NULL);
2033         
2034         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
2035                 newsk->window_clamp = rt->rt_window;
2036         else
2037                 newsk->window_clamp = 0;
2038                 
2039         if (sk->user_mss)
2040                 newsk->mtu = sk->user_mss;
2041         else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
2042                 newsk->mtu = rt->rt_mss - HEADER_SIZE;
2043         else 
2044         {
2045 #ifdef CONFIG_INET_SNARL        /* Sub Nets Are Local */
2046                 if ((saddr ^ daddr) & default_mask(saddr))
2047 #else
2048                 if ((saddr ^ daddr) & dev->pa_mask)
2049 #endif
2050                         newsk->mtu = 576 - HEADER_SIZE;
2051                 else
2052                         newsk->mtu = MAX_WINDOW;
2053         }
2054 
2055         /*
2056          *      But not bigger than device MTU 
2057          */
2058 
2059         newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
2060 
2061         /*
2062          *      This will min with what arrived in the packet 
2063          */
2064 
2065         tcp_options(newsk,skb->h.th);
2066 
2067         buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
2068         if (buff == NULL) 
2069         {
2070                 sk->err = -ENOMEM;
2071                 newsk->dead = 1;
2072                 release_sock(newsk);
2073                 kfree_skb(skb, FREE_READ);
2074                 tcp_statistics.TcpAttemptFails++;
2075                 return;
2076         }
2077   
2078         buff->len = sizeof(struct tcphdr)+4;
2079         buff->sk = newsk;
2080         buff->localroute = newsk->localroute;
2081 
2082         t1 =(struct tcphdr *) buff->data;
2083 
2084         /*
2085          *      Put in the IP header and routing stuff. 
2086          */
2087 
2088         tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
2089                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2090 
2091         /*
2092          *      Something went wrong. 
2093          */
2094 
2095         if (tmp < 0) 
2096         {
2097                 sk->err = tmp;
2098                 buff->free=1;
2099                 kfree_skb(buff,FREE_WRITE);
2100                 newsk->dead = 1;
2101                 release_sock(newsk);
2102                 skb->sk = sk;
2103                 kfree_skb(skb, FREE_READ);
2104                 tcp_statistics.TcpAttemptFails++;
2105                 return;
2106         }
2107 
2108         buff->len += tmp;
2109         t1 =(struct tcphdr *)((char *)t1 +tmp);
2110   
2111         memcpy(t1, skb->h.th, sizeof(*t1));
2112         buff->h.seq = newsk->write_seq;
2113         /*
2114          *      Swap the send and the receive. 
2115          */
2116         t1->dest = skb->h.th->source;
2117         t1->source = newsk->dummy_th.source;
2118         t1->seq = ntohl(newsk->write_seq++);
2119         t1->ack = 1;
2120         newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/
2121         newsk->sent_seq = newsk->write_seq;
2122         t1->window = ntohs(newsk->window);
2123         t1->res1 = 0;
2124         t1->res2 = 0;
2125         t1->rst = 0;
2126         t1->urg = 0;
2127         t1->psh = 0;
2128         t1->syn = 1;
2129         t1->ack_seq = ntohl(skb->h.th->seq+1);
2130         t1->doff = sizeof(*t1)/4+1;
2131         ptr =(unsigned char *)(t1+1);
2132         ptr[0] = 2;
2133         ptr[1] = 4;
2134         ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2135         ptr[3] =(newsk->mtu) & 0xff;
2136 
2137         tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2138         newsk->prot->queue_xmit(newsk, dev, buff, 0);
2139 
2140         reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_TIMEOUT_INIT);
2141         skb->sk = newsk;
2142 
2143         /*
2144          *      Charge the sock_buff to newsk. 
2145          */
2146          
2147         sk->rmem_alloc -= skb->mem_len;
2148         newsk->rmem_alloc += skb->mem_len;
2149         
2150         skb_queue_tail(&sk->receive_queue,skb);
2151         sk->ack_backlog++;
2152         release_sock(newsk);
2153         tcp_statistics.TcpOutSegs++;
2154 }
2155 
2156 
2157 static void tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2158 {
2159         struct sk_buff *buff;
2160         int need_reset = 0;
2161         struct tcphdr *t1, *th;
2162         struct proto *prot;
2163         struct device *dev=NULL;
2164         int tmp;
2165 
2166         /*
2167          * We need to grab some memory, and put together a FIN, 
2168          * and then put it into the queue to be sent.
2169          */
2170         sk->inuse = 1;
2171         sk->keepopen = 1;
2172         sk->shutdown = SHUTDOWN_MASK;
2173 
2174         if (!sk->dead) 
2175                 sk->state_change(sk);
2176 
2177         /*
2178          *      We need to flush the recv. buffs. 
2179          */
2180 
2181         if (skb_peek(&sk->receive_queue) != NULL) 
2182         {
2183                 struct sk_buff *skb;
2184                 if(sk->debug)
2185                         printk("Clean rcv queue\n");
2186                 while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
2187                 {
2188                         /* The +1 is not needed because the FIN takes up sequence space and
2189                            is not read!!! */
2190                         if(skb->len > 0 && after(skb->h.th->seq + skb->len/* + 1 */ , sk->copied_seq))
2191                                 need_reset = 1;
2192                         kfree_skb(skb, FREE_READ);
2193                 }
2194                 if(sk->debug)
2195                         printk("Cleaned.\n");
2196         }
2197 
2198         /*
2199          *      Get rid off any half-completed packets. 
2200          */
2201          
2202         if (sk->partial) 
2203         {
2204                 tcp_send_partial(sk);
2205         }
2206 
2207         switch(sk->state) 
2208         {
2209                 case TCP_FIN_WAIT1:
2210                 case TCP_FIN_WAIT2:
2211                 case TCP_CLOSING:
2212                         /*
2213                          * These states occur when we have already closed out
2214                          * our end.  If there is no timeout, we do not do
2215                          * anything.  We may still be in the middle of sending
2216                          * the remainder of our buffer, for example...
2217                          * resetting the timer would be inappropriate.
2218                          *
2219                          * XXX if retransmit count reaches limit, is tcp_close()
2220                          * called with timeout == 1 ? if not, we need to fix that.
2221                          */
2222                         if (!timeout) {
2223                                 int timer_active;
2224 
2225                                 timer_active = del_timer(&sk->timer);
2226                                 if (timer_active)
2227                                         add_timer(&sk->timer);
2228                                 else
2229                                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2230                         }
2231 #ifdef NOTDEF
2232                         /* 
2233                          *      Start a timer.
2234                          * original code was 4 * sk->rtt.  In converting to the
2235                          * new rtt representation, we can't quite use that.
2236                          * it seems to make most sense to  use the backed off value
2237                          */
2238                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2239 #endif
2240                         if (timeout) 
2241                                 tcp_time_wait(sk);
2242                         release_sock(sk);
2243                         return; /* break causes a double release - messy */
2244                 case TCP_TIME_WAIT:
2245                 case TCP_LAST_ACK:
2246                         /*
2247                          * A timeout from these states terminates the TCB.
2248                          */
2249                         if (timeout) 
2250                         {
2251                                 sk->state = TCP_CLOSE;
2252                         }
2253                         release_sock(sk);
2254                         return;
2255                 case TCP_LISTEN:
2256                         sk->state = TCP_CLOSE;
2257                         release_sock(sk);
2258                         return;
2259                 case TCP_CLOSE:
2260                         release_sock(sk);
2261                         return;
2262                 case TCP_CLOSE_WAIT:
2263                 case TCP_ESTABLISHED:
2264                 case TCP_SYN_SENT:
2265                 case TCP_SYN_RECV:
2266                         prot =(struct proto *)sk->prot;
2267                         th =(struct tcphdr *)&sk->dummy_th;
2268                         buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2269                         if (buff == NULL) 
2270                         {
2271                                 /* This will force it to try again later. */
2272                                 /* Or it would have if someone released the socket
2273                                    first. Anyway it might work now */
2274                                 release_sock(sk);
2275                                 if (sk->state != TCP_CLOSE_WAIT)
2276                                         sk->state = TCP_ESTABLISHED;
2277                                 reset_timer(sk, TIME_CLOSE, 100);
2278                                 return;
2279                         }
2280                         buff->sk = sk;
2281                         buff->free = 1;
2282                         buff->len = sizeof(*t1);
2283                         buff->localroute = sk->localroute;
2284                         t1 =(struct tcphdr *) buff->data;
2285         
2286                         /*
2287                          *      Put in the IP header and routing stuff. 
2288                          */
2289                         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2290                                          IPPROTO_TCP, sk->opt,
2291                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2292                         if (tmp < 0) 
2293                         {
2294                                 sk->write_seq++;        /* Very important 8) */
2295                                 kfree_skb(buff,FREE_WRITE);
2296 
2297                                 /*
2298                                  * Enter FIN_WAIT1 to await completion of
2299                                  * written out data and ACK to our FIN.
2300                                  */
2301 
2302                                 if(sk->state==TCP_ESTABLISHED)
2303                                         sk->state=TCP_FIN_WAIT1;
2304                                 else
2305                                         sk->state=TCP_FIN_WAIT2;
2306                                 reset_timer(sk, TIME_CLOSE,4*sk->rto);
2307                                 if(timeout)
2308                                         tcp_time_wait(sk);
2309 
2310                                 release_sock(sk);
2311                                 return;
2312                         }
2313 
2314                         t1 =(struct tcphdr *)((char *)t1 +tmp);
2315                         buff->len += tmp;
2316                         buff->dev = dev;
2317                         memcpy(t1, th, sizeof(*t1));
2318                         t1->seq = ntohl(sk->write_seq);
2319                         sk->write_seq++;
2320                         buff->h.seq = sk->write_seq;
2321                         t1->ack = 1;
2322         
2323                         /* 
2324                          *      Ack everything immediately from now on. 
2325                          */
2326 
2327                         sk->delay_acks = 0;
2328                         t1->ack_seq = ntohl(sk->acked_seq);
2329                         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
2330                         t1->fin = 1;
2331                         t1->rst = need_reset;
2332                         t1->doff = sizeof(*t1)/4;
2333                         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2334 
2335                         tcp_statistics.TcpOutSegs++;
2336         
2337                         if (skb_peek(&sk->write_queue) == NULL) 
2338                         {
2339                                 sk->sent_seq = sk->write_seq;
2340                                 prot->queue_xmit(sk, dev, buff, 0);
2341                         } 
2342                         else 
2343                         {
2344                                 reset_timer(sk, TIME_WRITE, sk->rto);
2345                                 if (buff->next != NULL) 
2346                                 {
2347                                         printk("tcp_close: next != NULL\n");
2348                                         skb_unlink(buff);
2349                                 }
2350                                 skb_queue_tail(&sk->write_queue, buff);
2351                         }
2352 
2353                         /*
2354                          * If established (normal close), enter FIN_WAIT1.
2355                          * If in CLOSE_WAIT, enter LAST_ACK
2356                          * If in CLOSING, remain in CLOSING
2357                          * otherwise enter FIN_WAIT2
2358                          */
2359 
2360                         if (sk->state == TCP_ESTABLISHED)
2361                             sk->state = TCP_FIN_WAIT1;
2362                         else if (sk->state == TCP_CLOSE_WAIT)
2363                             sk->state = TCP_LAST_ACK;
2364                         else if (sk->state != TCP_CLOSING)
2365                             sk->state = TCP_FIN_WAIT2;
2366         }
2367         release_sock(sk);
2368 }
2369 
2370 
2371 /*
2372  * This routine takes stuff off of the write queue,
2373  * and puts it in the xmit queue.
2374  */
2375 static void
2376 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2377 {
2378         struct sk_buff *skb;
2379 
2380         /*
2381          *      The bytes will have to remain here. In time closedown will
2382          *      empty the write queue and all will be happy 
2383          */
2384 
2385         if(sk->zapped)
2386                 return;
2387 
2388         while((skb = skb_peek(&sk->write_queue)) != NULL &&
2389                 before(skb->h.seq, sk->window_seq + 1) &&
2390                 (sk->retransmits == 0 ||
2391                  sk->timeout != TIME_WRITE ||
2392                  before(skb->h.seq, sk->rcv_ack_seq + 1))
2393                 && sk->packets_out < sk->cong_window) 
2394         {
2395                 IS_SKB(skb);
2396                 skb_unlink(skb);
2397                 /* See if we really need to send the packet. */
2398                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) 
2399                 {
2400                         sk->retransmits = 0;
2401                         kfree_skb(skb, FREE_WRITE);
2402                         if (!sk->dead) 
2403                                 sk->write_space(sk);
2404                 } 
2405                 else
2406                 {
2407                         struct tcphdr *th;
2408                         struct iphdr *iph;
2409                         int size;
2410 /*
2411  * put in the ack seq and window at this point rather than earlier,
2412  * in order to keep them monotonic.  We really want to avoid taking
2413  * back window allocations.  That's legal, but RFC1122 says it's frowned on.
2414  * Ack and window will in general have changed since this packet was put
2415  * on the write queue.
2416  */
2417                         iph = (struct iphdr *)(skb->data +
2418                                                skb->dev->hard_header_len);
2419                         th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
2420                         size = skb->len - (((unsigned char *) th) - skb->data);
2421                         
2422                         th->ack_seq = ntohl(sk->acked_seq);
2423                         th->window = ntohs(tcp_select_window(sk));
2424 
2425                         tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
2426 
2427                         sk->sent_seq = skb->h.seq;
2428                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2429                 }
2430         }
2431 }
2432 
2433 
2434 /*
2435  *      This routine sorts the send list, and resets the
2436  *      sk->send_head and sk->send_tail pointers.
2437  */
2438 
2439 static void sort_send(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2440 {
2441         struct sk_buff *list = NULL;
2442         struct sk_buff *skb,*skb2,*skb3;
2443 
2444         for (skb = sk->send_head; skb != NULL; skb = skb2) 
2445         {
2446                 skb2 = skb->link3;
2447                 if (list == NULL || before (skb2->h.seq, list->h.seq)) 
2448                 {
2449                         skb->link3 = list;
2450                         sk->send_tail = skb;
2451                         list = skb;
2452                 }
2453                 else
2454                 {
2455                         for (skb3 = list; ; skb3 = skb3->link3) 
2456                         {
2457                                 if (skb3->link3 == NULL ||
2458                                     before(skb->h.seq, skb3->link3->h.seq))
2459                                 {
2460                                         skb->link3 = skb3->link3;
2461                                         skb3->link3 = skb;
2462                                         if (skb->link3 == NULL) 
2463                                                 sk->send_tail = skb;
2464                                         break;
2465                                 }
2466                         }
2467                 }
2468         }
2469         sk->send_head = list;
2470 }
2471   
2472 
2473 /*
2474  *      This routine deals with incoming acks, but not outgoing ones.
2475  */
2476 
2477 static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2478 {
2479         unsigned long ack;
2480         int flag = 0;
2481 
2482         /* 
2483          * 1 - there was data in packet as well as ack or new data is sent or 
2484          *     in shutdown state
2485          * 2 - data from retransmit queue was acked and removed
2486          * 4 - window shrunk or data from retransmit queue was acked and removed
2487          */
2488 
2489         if(sk->zapped)
2490                 return(1);      /* Dead, cant ack any more so why bother */
2491 
2492         ack = ntohl(th->ack_seq);
2493         if (ntohs(th->window) > sk->max_window) 
2494         {
2495                 sk->max_window = ntohs(th->window);
2496 #ifdef CONFIG_INET_PCTCP
2497                 sk->mss = min(sk->max_window>>1, sk->mtu);
2498 #else
2499                 sk->mss = min(sk->max_window, sk->mtu);
2500 #endif  
2501         }
2502 
2503         if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2504                 sk->retransmits = 0;
2505 
2506 #if 0
2507 /*
2508  *      Not quite clear why the +1 and -1 here, and why not +1 in next line 
2509  */
2510  
2511         if (after(ack, sk->sent_seq+1) || before(ack, sk->rcv_ack_seq-1)) 
2512 #else   
2513         if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
2514 #endif  
2515         {
2516                 if(sk->debug)
2517                         printk("Ack ignored %lu %lu\n",ack,sk->sent_seq);
2518                 /*
2519                  * What is all this crap? the ack sequence number is bad or
2520                  * old, we should return 0 to ignore the packet. XXX
2521                  */
2522                 return(0);
2523 #ifdef NOTDEF
2524                 if (after(ack, sk->sent_seq) ||
2525                    (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) 
2526                 {
2527                         return(0);
2528                 }
2529                 if (sk->keepopen) 
2530                 {
2531                         reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2532                 }
2533                 return(1);
2534 #endif
2535         }
2536 
2537         if (len != th->doff*4) 
2538                 flag |= 1;
2539 
2540         /* See if our window has been shrunk. */
2541 
2542         if (after(sk->window_seq, ack+ntohs(th->window))) 
2543         {
2544                 /*
2545                  * We may need to move packets from the send queue
2546                  * to the write queue, if the window has been shrunk on us.
2547                  * The RFC says you are not allowed to shrink your window
2548                  * like this, but if the other end does, you must be able
2549                  * to deal with it.
2550                  */
2551                 struct sk_buff *skb;
2552                 struct sk_buff *skb2;
2553                 struct sk_buff *wskb = NULL;
2554         
2555                 skb2 = sk->send_head;
2556                 sk->send_head = NULL;
2557                 sk->send_tail = NULL;
2558         
2559                 flag |= 4;
2560         
2561                 sk->window_seq = ack + ntohs(th->window);
2562                 cli();
2563                 while (skb2 != NULL) 
2564                 {
2565                         skb = skb2;
2566                         skb2 = skb->link3;
2567                         skb->link3 = NULL;
2568                         if (after(skb->h.seq, sk->window_seq)) 
2569                         {
2570                                 if (sk->packets_out > 0) 
2571                                         sk->packets_out--;
2572                                 /* We may need to remove this from the dev send list. */
2573                                 if (skb->next != NULL) 
2574                                 {
2575                                         skb_unlink(skb);                                
2576                                 }
2577                                 /* Now add it to the write_queue. */
2578                                 if (wskb == NULL)
2579                                         skb_queue_head(&sk->write_queue,skb);
2580                                 else
2581                                         skb_append(wskb,skb);
2582                                 wskb = skb;
2583                         } 
2584                         else 
2585                         {
2586                                 if (sk->send_head == NULL) 
2587                                 {
2588                                         sk->send_head = skb;
2589                                         sk->send_tail = skb;
2590                                 }
2591                                 else
2592                                 {
2593                                         sk->send_tail->link3 = skb;
2594                                         sk->send_tail = skb;
2595                                 }
2596                                 skb->link3 = NULL;
2597                         }
2598                 }
2599                 sti();
2600         }
2601 
2602         if (sk->send_tail == NULL || sk->send_head == NULL) 
2603         {
2604                 sk->send_head = NULL;
2605                 sk->send_tail = NULL;
2606                 sk->packets_out= 0;
2607         }
2608 
2609         sk->window_seq = ack + ntohs(th->window);
2610 
2611         /* We don't want too many packets out there. */
2612         if (sk->timeout == TIME_WRITE && 
2613                 sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
2614         {
2615 /* 
2616  * This is Jacobson's slow start and congestion avoidance. 
2617  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2618  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2619  * counter and increment it once every cwnd times.  It's possible
2620  * that this should be done only if sk->retransmits == 0.  I'm
2621  * interpreting "new data is acked" as including data that has
2622  * been retransmitted but is just now being acked.
2623  */
2624                 if (sk->cong_window < sk->ssthresh)  
2625                   /* 
2626                    *    In "safe" area, increase
2627                    */
2628                         sk->cong_window++;
2629                 else 
2630                 {
2631                   /*
2632                    *    In dangerous area, increase slowly.  In theory this is
2633                    *    sk->cong_window += 1 / sk->cong_window
2634                    */
2635                         if (sk->cong_count >= sk->cong_window) 
2636                         {
2637                                 sk->cong_window++;
2638                                 sk->cong_count = 0;
2639                         }
2640                         else 
2641                                 sk->cong_count++;
2642                 }
2643         }
2644 
2645         sk->rcv_ack_seq = ack;
2646 
2647         /*
2648          * if this ack opens up a zero window, clear backoff.  It was
2649          * being used to time the probes, and is probably far higher than
2650          * it needs to be for normal retransmission.
2651          */
2652 
2653         if (sk->timeout == TIME_PROBE0) 
2654         {
2655                 if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
2656                     ! before (sk->window_seq, sk->write_queue.next->h.seq)) 
2657                 {
2658                         sk->retransmits = 0;
2659                         sk->backoff = 0;
2660                   /*
2661                    *    Recompute rto from rtt.  this eliminates any backoff.
2662                    */
2663 
2664                         sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2665                         if (sk->rto > 120*HZ)
2666                                 sk->rto = 120*HZ;
2667                         if (sk->rto < 20)       /* Was 1*HZ, then 1 - turns out we must allow about
2668                                                    .2 of a second because of BSD delayed acks - on a 100Mb/sec link
2669                                                    .2 of a second is going to need huge windows (SIGH) */
2670                                 sk->rto = 20;
2671                 }
2672         }
2673 
2674   /* 
2675    *    See if we can take anything off of the retransmit queue.
2676    */
2677    
2678         while(sk->send_head != NULL) 
2679         {
2680                 /* Check for a bug. */
2681                 if (sk->send_head->link3 &&
2682                     after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) 
2683                 {
2684                         printk("INET: tcp.c: *** bug send_list out of order.\n");
2685                         sort_send(sk);
2686                 }
2687 
2688                 if (before(sk->send_head->h.seq, ack+1)) 
2689                 {
2690                         struct sk_buff *oskb;   
2691                         if (sk->retransmits) 
2692                         {       
2693                                 /*
2694                                  *      We were retransmitting.  don't count this in RTT est 
2695                                  */
2696                                 flag |= 2;
2697 
2698                                 /*
2699                                  * even though we've gotten an ack, we're still
2700                                  * retransmitting as long as we're sending from
2701                                  * the retransmit queue.  Keeping retransmits non-zero
2702                                  * prevents us from getting new data interspersed with
2703                                  * retransmissions.
2704                                  */
2705 
2706                                 if (sk->send_head->link3)
2707                                         sk->retransmits = 1;
2708                                 else
2709                                         sk->retransmits = 0;
2710                         }
2711                         /*
2712                          * Note that we only reset backoff and rto in the
2713                          * rtt recomputation code.  And that doesn't happen
2714                          * if there were retransmissions in effect.  So the
2715                          * first new packet after the retransmissions is
2716                          * sent with the backoff still in effect.  Not until
2717                          * we get an ack from a non-retransmitted packet do
2718                          * we reset the backoff and rto.  This allows us to deal
2719                          * with a situation where the network delay has increased
2720                          * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2721                          */
2722 
2723                         /*
2724                          *      We have one less packet out there. 
2725                          */
2726                          
2727                         if (sk->packets_out > 0) 
2728                                 sk->packets_out --;
2729                         /* 
2730                          *      Wake up the process, it can probably write more. 
2731                          */
2732                         if (!sk->dead) 
2733                                 sk->write_space(sk);
2734                         oskb = sk->send_head;
2735 
2736                         if (!(flag&2)) 
2737                         {
2738                                 long m;
2739         
2740                                 /*
2741                                  *      The following amusing code comes from Jacobson's
2742                                  *      article in SIGCOMM '88.  Note that rtt and mdev
2743                                  *      are scaled versions of rtt and mean deviation.
2744                                  *      This is designed to be as fast as possible 
2745                                  *      m stands for "measurement".
2746                                  */
2747         
2748                                 m = jiffies - oskb->when;  /* RTT */
2749                                 if(m<=0)
2750                                         m=1;            /* IS THIS RIGHT FOR <0 ??? */
2751                                 m -= (sk->rtt >> 3);    /* m is now error in rtt est */
2752                                 sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
2753                                 if (m < 0)
2754                                         m = -m;         /* m is now abs(error) */
2755                                 m -= (sk->mdev >> 2);   /* similar update on mdev */
2756                                 sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
2757         
2758                                 /*
2759                                  *      Now update timeout.  Note that this removes any backoff.
2760                                  */
2761                          
2762                                 sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2763                                 if (sk->rto > 120*HZ)
2764                                         sk->rto = 120*HZ;
2765                                 if (sk->rto < 20)       /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
2766                                         sk->rto = 20;
2767                                 sk->backoff = 0;
2768                         }
2769                         flag |= (2|4);
2770                         cli();
2771                         oskb = sk->send_head;
2772                         IS_SKB(oskb);
2773                         sk->send_head = oskb->link3;
2774                         if (sk->send_head == NULL) 
2775                         {
2776                                 sk->send_tail = NULL;
2777                         }
2778 
2779                 /*
2780                  *      We may need to remove this from the dev send list. 
2781                  */
2782 
2783                         if (oskb->next)
2784                                 skb_unlink(oskb);
2785                         sti();
2786                         kfree_skb(oskb, FREE_WRITE); /* write. */
2787                         if (!sk->dead) 
2788                                 sk->write_space(sk);
2789                 }
2790                 else
2791                 {
2792                         break;
2793                 }
2794         }
2795 
2796         /*
2797          * XXX someone ought to look at this too.. at the moment, if skb_peek()
2798          * returns non-NULL, we complete ignore the timer stuff in the else
2799          * clause.  We ought to organize the code so that else clause can
2800          * (should) be executed regardless, possibly moving the PROBE timer
2801          * reset over.  The skb_peek() thing should only move stuff to the
2802          * write queue, NOT also manage the timer functions.
2803          */
2804 
2805         /*
2806          * Maybe we can take some stuff off of the write queue,
2807          * and put it onto the xmit queue.
2808          */
2809         if (skb_peek(&sk->write_queue) != NULL) 
2810         {
2811                 if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
2812                         (sk->retransmits == 0 || 
2813                          sk->timeout != TIME_WRITE ||
2814                          before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
2815                         && sk->packets_out < sk->cong_window) 
2816                 {
2817                         flag |= 1;
2818                         tcp_write_xmit(sk);
2819                 }
2820                 else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
2821                         sk->send_head == NULL &&
2822                         sk->ack_backlog == 0 &&
2823                         sk->state != TCP_TIME_WAIT) 
2824                 {
2825                         reset_timer(sk, TIME_PROBE0, sk->rto);
2826                 }               
2827         }
2828         else
2829         {
2830                 /*
2831                  * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
2832                  * from TCP_CLOSE we don't do anything
2833                  *
2834                  * from anything else, if there is write data (or fin) pending,
2835                  * we use a TIME_WRITE timeout, else if keepalive we reset to
2836                  * a KEEPALIVE timeout, else we delete the timer.
2837                  *
2838                  * We do not set flag for nominal write data, otherwise we may
2839                  * force a state where we start to write itsy bitsy tidbits
2840                  * of data.
2841                  */
2842 
2843                 switch(sk->state) {
2844                 case TCP_TIME_WAIT:
2845                         /*
2846                          * keep us in TIME_WAIT until we stop getting packets,
2847                          * reset the timeout.
2848                          */
2849                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2850                         break;
2851                 case TCP_CLOSE:
2852                         /*
2853                          * don't touch the timer.
2854                          */
2855                         break;
2856                 default:
2857                         /*
2858                          * must check send_head, write_queue, and ack_backlog
2859                          * to determine which timeout to use.
2860                          */
2861                         if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) {
2862                                 reset_timer(sk, TIME_WRITE, sk->rto);
2863                         } else if (sk->keepopen) {
2864                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2865                         } else {
2866                                 delete_timer(sk);
2867                         }
2868                         break;
2869                 }
2870 #ifdef NOTDEF
2871                 if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2872                 sk->state != TCP_TIME_WAIT && !sk->keepopen) 
2873                 {
2874                         if (!sk->dead)
2875                                 sk->write_space(sk);
2876                         if (sk->keepopen) {
2877                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2878                         } else {
2879                                 delete_timer(sk);
2880                         }
2881                 }
2882                 else
2883                 {
2884                         if (sk->state != (unsigned char) sk->keepopen) 
2885                         {
2886                                 reset_timer(sk, TIME_WRITE, sk->rto);
2887                         }
2888                         if (sk->state == TCP_TIME_WAIT) 
2889                         {
2890                                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2891                         }       
2892                 }
2893 #endif
2894         }
2895 
2896         if (sk->packets_out == 0 && sk->partial != NULL &&
2897                 skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
2898         {
2899                 flag |= 1;
2900                 tcp_send_partial(sk);
2901         }
2902 
2903         /*
2904          * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
2905          * we are now waiting for an acknowledge to our FIN.  The other end is
2906          * already in TIME_WAIT.
2907          *
2908          * Move to TCP_CLOSE on success.
2909          */
2910 
2911         if (sk->state == TCP_LAST_ACK) 
2912         {
2913                 if (!sk->dead)
2914                         sk->state_change(sk);
2915                 if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) 
2916                 {
2917                         flag |= 1;
2918                         sk->state = TCP_CLOSE;
2919                         sk->shutdown = SHUTDOWN_MASK;
2920                 }
2921         }
2922 
2923         /*
2924          * Incoming ACK to a FIN we sent in the case of our initiating the close.
2925          *
2926          * Move to FIN_WAIT2 to await a FIN from the other end.
2927          */
2928 
2929         if (sk->state == TCP_FIN_WAIT1) 
2930         {
2931 
2932                 if (!sk->dead) 
2933                         sk->state_change(sk);
2934                 if (sk->rcv_ack_seq == sk->write_seq) 
2935                 {
2936                         flag |= 1;
2937                         if (sk->acked_seq != sk->fin_seq) 
2938                         {
2939                                 tcp_time_wait(sk);
2940                         }
2941                         else
2942                         {
2943                                 sk->shutdown = SHUTDOWN_MASK;
2944                                 sk->state = TCP_FIN_WAIT2;
2945                         }
2946                 }
2947         }
2948 
2949         /*
2950          *      Incoming ACK to a FIN we sent in the case of a simultaneous close.
2951          *
2952          *      Move to TIME_WAIT
2953          */
2954 
2955         if (sk->state == TCP_CLOSING) 
2956         {
2957 
2958                 if (!sk->dead) 
2959                         sk->state_change(sk);
2960                 if (sk->rcv_ack_seq == sk->write_seq) 
2961                 {
2962                         flag |= 1;
2963                         tcp_time_wait(sk);
2964                 }
2965         }
2966 
2967         /*
2968          * I make no guarantees about the first clause in the following
2969          * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
2970          * what conditions "!flag" would be true.  However I think the rest
2971          * of the conditions would prevent that from causing any
2972          * unnecessary retransmission. 
2973          *   Clearly if the first packet has expired it should be 
2974          * retransmitted.  The other alternative, "flag&2 && retransmits", is
2975          * harder to explain:  You have to look carefully at how and when the
2976          * timer is set and with what timeout.  The most recent transmission always
2977          * sets the timer.  So in general if the most recent thing has timed
2978          * out, everything before it has as well.  So we want to go ahead and
2979          * retransmit some more.  If we didn't explicitly test for this
2980          * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
2981          * would not be true.  If you look at the pattern of timing, you can
2982          * show that rto is increased fast enough that the next packet would
2983          * almost never be retransmitted immediately.  Then you'd end up
2984          * waiting for a timeout to send each packet on the retransmission
2985          * queue.  With my implementation of the Karn sampling algorithm,
2986          * the timeout would double each time.  The net result is that it would
2987          * take a hideous amount of time to recover from a single dropped packet.
2988          * It's possible that there should also be a test for TIME_WRITE, but
2989          * I think as long as "send_head != NULL" and "retransmit" is on, we've
2990          * got to be in real retransmission mode.
2991          *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
2992          * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
2993          * As long as no further losses occur, this seems reasonable.
2994          */
2995         
2996         if (((!flag) || (flag&4)) && sk->send_head != NULL &&
2997                (((flag&2) && sk->retransmits) ||
2998                (sk->send_head->when + sk->rto < jiffies))) 
2999         {
3000                 ip_do_retransmit(sk, 1);
3001                 reset_timer(sk, TIME_WRITE, sk->rto);
3002         }
3003 
3004         return(1);
3005 }
3006 
3007 
3008 /*
3009  *      This routine handles the data.  If there is room in the buffer,
3010  *      it will be have already been moved into it.  If there is no
3011  *      room, then we will just have to discard the packet.
3012  */
3013 
3014 static int tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
3015          unsigned long saddr, unsigned short len)
3016 {
3017         struct sk_buff *skb1, *skb2;
3018         struct tcphdr *th;
3019         int dup_dumped=0;
3020         unsigned long new_seq;
3021 
3022         th = skb->h.th;
3023         skb->len = len -(th->doff*4);
3024 
3025         /* The bytes in the receive read/assembly queue has increased. Needed for the
3026            low memory discard algorithm */
3027            
3028         sk->bytes_rcv += skb->len;
3029         
3030         if (skb->len == 0 && !th->fin && !th->urg && !th->psh) 
3031         {
3032                 /* 
3033                  *      Don't want to keep passing ack's back and forth. 
3034                  *      (someone sent us dataless, boring frame)
3035                  */
3036                 if (!th->ack)
3037                         tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
3038                 kfree_skb(skb, FREE_READ);
3039                 return(0);
3040         }
3041         
3042         /*
3043          *      We no longer have anyone receiving data on this connection.
3044          */
3045 
3046         if(sk->shutdown & RCV_SHUTDOWN)
3047         {
3048                 new_seq= th->seq + skb->len + th->syn;  /* Right edge of _data_ part of frame */
3049                 
3050                 if(after(new_seq,sk->copied_seq+1))     /* If the right edge of this frame is after the last copied byte
3051                                                            then it contains data we will never touch. We send an RST to 
3052                                                            ensure the far end knows it never got to the application */
3053                 {
3054                         sk->acked_seq = new_seq + th->fin;
3055                         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
3056                                 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
3057                         tcp_statistics.TcpEstabResets++;
3058                         sk->state = TCP_CLOSE;
3059                         sk->err = EPIPE;
3060                         sk->shutdown = SHUTDOWN_MASK;
3061                         kfree_skb(skb, FREE_READ);
3062                         if (!sk->dead)
3063                                 sk->state_change(sk);
3064                         return(0);
3065                 }
3066 #if 0           
3067                 /* Discard the frame here - we've already proved its a duplicate */
3068                 
3069                 kfree_skb(skb, FREE_READ);
3070                 return(0);                              
3071 #endif          
3072         }
3073         /*
3074          *      Now we have to walk the chain, and figure out where this one
3075          *      goes into it.  This is set up so that the last packet we received
3076          *      will be the first one we look at, that way if everything comes
3077          *      in order, there will be no performance loss, and if they come
3078          *      out of order we will be able to fit things in nicely.
3079          */
3080 
3081         /* 
3082          *      This should start at the last one, and then go around forwards.
3083          */
3084 
3085         if (skb_peek(&sk->receive_queue) == NULL)       /* Empty queue is easy case */
3086         {
3087                 skb_queue_head(&sk->receive_queue,skb);
3088                 skb1= NULL;
3089         } 
3090         else
3091         {
3092                 for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
3093                 {
3094                         if(sk->debug)
3095                         {
3096                                 printk("skb1=%p :", skb1);
3097                                 printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
3098                                 printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
3099                                 printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
3100                                                 sk->acked_seq);
3101                         }
3102                         
3103                         /*
3104                          *      Optimisation: Duplicate frame or extension of previous frame from
3105                          *      same sequence point (lost ack case).
3106                          *      The frame contains duplicate data or replaces a previous frame
3107                          *      discard the previous frame (safe as sk->inuse is set) and put
3108                          *      the new one in its place.
3109                          */
3110                          
3111                         if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
3112                         {
3113                                 skb_append(skb1,skb);
3114                                 skb_unlink(skb1);
3115                                 kfree_skb(skb1,FREE_READ);
3116                                 dup_dumped=1;
3117                                 skb1=NULL;
3118                                 break;
3119                         }
3120                         
3121                         /*
3122                          *      Found where it fits
3123                          */
3124                          
3125                         if (after(th->seq+1, skb1->h.th->seq))
3126                         {
3127                                 skb_append(skb1,skb);
3128                                 break;
3129                         }
3130                         
3131                         /*
3132                          *      See if we've hit the start. If so insert.
3133                          */
3134                         if (skb1 == skb_peek(&sk->receive_queue))
3135                         {
3136                                 skb_queue_head(&sk->receive_queue, skb);
3137                                 break;
3138                         }
3139                 }
3140         }
3141 
3142         /*
3143          *      Figure out what the ack value for this frame is
3144          */
3145          
3146         th->ack_seq = th->seq + skb->len;
3147         if (th->syn) 
3148                 th->ack_seq++;
3149         if (th->fin)
3150                 th->ack_seq++;
3151 
3152         if (before(sk->acked_seq, sk->copied_seq)) 
3153         {
3154                 printk("*** tcp.c:tcp_data bug acked < copied\n");
3155                 sk->acked_seq = sk->copied_seq;
3156         }
3157 
3158         /*
3159          *      Now figure out if we can ack anything.
3160          */
3161 
3162         if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) 
3163         {
3164                 if (before(th->seq, sk->acked_seq+1)) 
3165                 {
3166                         int newwindow;
3167 
3168                         if (after(th->ack_seq, sk->acked_seq)) 
3169                         {
3170                                 newwindow = sk->window-(th->ack_seq - sk->acked_seq);
3171                                 if (newwindow < 0)
3172                                         newwindow = 0;  
3173                                 sk->window = newwindow;
3174                                 sk->acked_seq = th->ack_seq;
3175                         }
3176                         skb->acked = 1;
3177 
3178                         /* 
3179                          *      When we ack the fin, we turn on the RCV_SHUTDOWN flag.
3180                          */
3181 
3182                         if (skb->h.th->fin) 
3183                         {
3184                                 if (!sk->dead) 
3185                                         sk->state_change(sk);
3186                                 sk->shutdown |= RCV_SHUTDOWN;
3187                         }
3188           
3189                         for(skb2 = skb->next;
3190                             skb2 != (struct sk_buff *)&sk->receive_queue;
3191                             skb2 = skb2->next) 
3192                         {
3193                                 if (before(skb2->h.th->seq, sk->acked_seq+1)) 
3194                                 {
3195                                         if (after(skb2->h.th->ack_seq, sk->acked_seq))
3196                                         {
3197                                                 newwindow = sk->window -
3198                                                  (skb2->h.th->ack_seq - sk->acked_seq);
3199                                                 if (newwindow < 0)
3200                                                         newwindow = 0;  
3201                                                 sk->window = newwindow;
3202                                                 sk->acked_seq = skb2->h.th->ack_seq;
3203                                         }
3204                                         skb2->acked = 1;
3205                                         /*
3206                                          *      When we ack the fin, we turn on
3207                                          *      the RCV_SHUTDOWN flag.
3208                                          */
3209                                         if (skb2->h.th->fin) 
3210                                         {
3211                                                 sk->shutdown |= RCV_SHUTDOWN;
3212                                                 if (!sk->dead)
3213                                                         sk->state_change(sk);
3214                                         }
3215 
3216                                         /*
3217                                          *      Force an immediate ack.
3218                                          */
3219                                          
3220                                         sk->ack_backlog = sk->max_ack_backlog;
3221                                 }
3222                                 else
3223                                 {
3224                                         break;
3225                                 }
3226                         }
3227 
3228                         /*
3229                          *      This also takes care of updating the window.
3230                          *      This if statement needs to be simplified.
3231                          */
3232                         if (!sk->delay_acks ||
3233                             sk->ack_backlog >= sk->max_ack_backlog || 
3234                             sk->bytes_rcv > sk->max_unacked || th->fin) {
3235         /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
3236                         }
3237                         else 
3238                         {
3239                                 sk->ack_backlog++;
3240                                 if(sk->debug)
3241                                         printk("Ack queued.\n");
3242                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3243                         }
3244                 }
3245         }
3246 
3247         /*
3248          *      If we've missed a packet, send an ack.
3249          *      Also start a timer to send another.
3250          */
3251          
3252         if (!skb->acked) 
3253         {
3254         
3255         /*
3256          *      This is important.  If we don't have much room left,
3257          *      we need to throw out a few packets so we have a good
3258          *      window.  Note that mtu is used, not mss, because mss is really
3259          *      for the send side.  He could be sending us stuff as large as mtu.
3260          */
3261                  
3262                 while (sk->prot->rspace(sk) < sk->mtu) 
3263                 {
3264                         skb1 = skb_peek(&sk->receive_queue);
3265                         if (skb1 == NULL) 
3266                         {
3267                                 printk("INET: tcp.c:tcp_data memory leak detected.\n");
3268                                 break;
3269                         }
3270 
3271                         /*
3272                          *      Don't throw out something that has been acked. 
3273                          */
3274                  
3275                         if (skb1->acked) 
3276                         {
3277                                 break;
3278                         }
3279                 
3280                         skb_unlink(skb1);
3281                         kfree_skb(skb1, FREE_READ);
3282                 }
3283                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3284                 sk->ack_backlog++;
3285                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3286         }
3287         else
3288         {
3289                 /* We missed a packet.  Send an ack to try to resync things. */
3290                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3291         }
3292 
3293         /*
3294          *      Now tell the user we may have some data. 
3295          */
3296          
3297         if (!sk->dead) 
3298         {
3299                 if(sk->debug)
3300                         printk("Data wakeup.\n");
3301                 sk->data_ready(sk,0);
3302         } 
3303         return(0);
3304 }
3305 
3306 
3307 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
3308 {
3309         unsigned long ptr = ntohs(th->urg_ptr);
3310 
3311         if (ptr)
3312                 ptr--;
3313         ptr += th->seq;
3314 
3315         /* ignore urgent data that we've already seen and read */
3316         if (after(sk->copied_seq+1, ptr))
3317                 return;
3318 
3319         /* do we already have a newer (or duplicate) urgent pointer? */
3320         if (sk->urg_data && !after(ptr, sk->urg_seq))
3321                 return;
3322 
3323         /* tell the world about our new urgent pointer */
3324         if (sk->proc != 0) {
3325                 if (sk->proc > 0) {
3326                         kill_proc(sk->proc, SIGURG, 1);
3327                 } else {
3328                         kill_pg(-sk->proc, SIGURG, 1);
3329                 }
3330         }
3331         sk->urg_data = URG_NOTYET;
3332         sk->urg_seq = ptr;
3333 }
3334 
3335 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
3336         unsigned long saddr, unsigned long len)
3337 {
3338         unsigned long ptr;
3339 
3340         /* check if we get a new urgent pointer */
3341         if (th->urg)
3342                 tcp_check_urg(sk,th);
3343 
3344         /* do we wait for any urgent data? */
3345         if (sk->urg_data != URG_NOTYET)
3346                 return 0;
3347 
3348         /* is the urgent pointer pointing into this packet? */
3349         ptr = sk->urg_seq - th->seq + th->doff*4;
3350         if (ptr >= len)
3351                 return 0;
3352 
3353         /* ok, got the correct packet, update info */
3354         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
3355         if (!sk->dead)
3356                 sk->data_ready(sk,0);
3357         return 0;
3358 }
3359 
3360 
3361 /*
3362  *  This deals with incoming fins. 'Linus at 9 O'clock' 8-) 
3363  *
3364  *  If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
3365  *  (and thence onto LAST-ACK and finally, CLOSE, we never enter
3366  *  TIME-WAIT)
3367  *
3368  *  If we are in FINWAIT-1, a received FIN indicates simultaneous
3369  *  close and we go into CLOSING (and later onto TIME-WAIT)
3370  *
3371  *  If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3372  *
3373  */
3374  
3375 static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
3376          unsigned long saddr, struct device *dev)
3377 {
3378         sk->fin_seq = th->seq + skb->len + th->syn + th->fin;
3379 
3380         if (!sk->dead) 
3381         {
3382                 sk->state_change(sk);
3383         }
3384 
3385         switch(sk->state) 
3386         {
3387                 case TCP_SYN_RECV:
3388                 case TCP_SYN_SENT:
3389                 case TCP_ESTABLISHED:
3390                         /*
3391                          * move to CLOSE_WAIT, tcp_data() already handled
3392                          * sending the ack.
3393                          */
3394                         reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
3395                         /*sk->fin_seq = th->seq+1;*/
3396                         tcp_statistics.TcpCurrEstab--;
3397                         sk->state = TCP_CLOSE_WAIT;
3398                         if (th->rst)
3399                                 sk->shutdown = SHUTDOWN_MASK;
3400                         break;
3401 
3402                 case TCP_CLOSE_WAIT:
3403                 case TCP_CLOSING:
3404                         /*
3405                          * received a retransmission of the FIN, do
3406                          * nothing.
3407                          */
3408                         break;
3409                 case TCP_TIME_WAIT:
3410                         /*
3411                          * received a retransmission of the FIN,
3412                          * restart the TIME_WAIT timer.
3413                          */
3414                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3415                         return(0);
3416                 case TCP_FIN_WAIT1:
3417                         /*
3418                          * This case occurs when a simultaneous close
3419                          * happens, we must ack the received FIN and
3420                          * enter the CLOSING state.
3421                          *
3422                          * XXX timeout not set properly
3423                          */
3424 
3425                         tcp_statistics.TcpCurrEstab--;
3426                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3427                         /*sk->fin_seq = th->seq+1;*/
3428                         sk->state = TCP_CLOSING;
3429                         break;
3430                 case TCP_FIN_WAIT2:
3431                         /*
3432                          * received a FIN -- send ACK and enter TIME_WAIT
3433                          */
3434                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3435                         /*sk->fin_seq = th->seq+1;*/
3436                         sk->state = TCP_TIME_WAIT;
3437                         break;
3438                 case TCP_CLOSE:
3439                         /*
3440                          * already in CLOSE
3441                          */
3442                         break;
3443                 default:
3444                         sk->state = TCP_LAST_ACK;
3445         
3446                         /* Start the timers. */
3447                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3448                         return(0);
3449         }
3450         sk->ack_backlog++;
3451 
3452         return(0);
3453 }
3454 
3455 
3456 /* This will accept the next outstanding connection. */
3457 static struct sock *
3458 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
3459 {
3460         struct sock *newsk;
3461         struct sk_buff *skb;
3462   
3463   /*
3464    * We need to make sure that this socket is listening,
3465    * and that it has something pending.
3466    */
3467 
3468         if (sk->state != TCP_LISTEN) 
3469         {
3470                 sk->err = EINVAL;
3471                 return(NULL); 
3472         }
3473 
3474         /* Avoid the race. */
3475         cli();
3476         sk->inuse = 1;
3477 
3478         while((skb = skb_dequeue(&sk->receive_queue)) == NULL) 
3479         {
3480                 if (flags & O_NONBLOCK) 
3481                 {
3482                         sti();
3483                         release_sock(sk);
3484                         sk->err = EAGAIN;
3485                         return(NULL);
3486                 }
3487 
3488                 release_sock(sk);
3489                 interruptible_sleep_on(sk->sleep);
3490                 if (current->signal & ~current->blocked) 
3491                 {
3492                         sti();
3493                         sk->err = ERESTARTSYS;
3494                         return(NULL);
3495                 }
3496                 sk->inuse = 1;
3497         }
3498         sti();
3499 
3500         /*
3501          *      Now all we need to do is return skb->sk. 
3502          */
3503 
3504         newsk = skb->sk;
3505 
3506         kfree_skb(skb, FREE_READ);
3507         sk->ack_backlog--;
3508         release_sock(sk);
3509         return(newsk);
3510 }
3511 
3512 
3513 /*
3514  *      This will initiate an outgoing connection. 
3515  */
3516  
3517 static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3518 {
3519         struct sk_buff *buff;
3520         struct device *dev=NULL;
3521         unsigned char *ptr;
3522         int tmp;
3523         struct tcphdr *t1;
3524         struct rtable *rt;
3525 
3526         if (sk->state != TCP_CLOSE) 
3527                 return(-EISCONN);
3528 
3529         if (addr_len < 8) 
3530                 return(-EINVAL);
3531 
3532         if (usin->sin_family && usin->sin_family != AF_INET) 
3533                 return(-EAFNOSUPPORT);
3534 
3535         /*
3536          *      connect() to INADDR_ANY means loopback (BSD'ism).
3537          */
3538         
3539         if(usin->sin_addr.s_addr==INADDR_ANY)
3540                 usin->sin_addr.s_addr=ip_my_addr();
3541                   
3542         /*
3543          *      Don't want a TCP connection going to a broadcast address 
3544          */
3545 
3546         if (ip_chk_addr(usin->sin_addr.s_addr) == IS_BROADCAST) 
3547         { 
3548                 return -ENETUNREACH;
3549         }
3550   
3551         /*
3552          *      Connect back to the same socket: Blows up so disallow it 
3553          */
3554 
3555         if(sk->saddr == usin->sin_addr.s_addr && sk->num==ntohs(usin->sin_port))
3556                 return -EBUSY;
3557 
3558         sk->inuse = 1;
3559         sk->daddr = usin->sin_addr.s_addr;
3560         sk->write_seq = jiffies * SEQ_TICK - seq_offset;
3561         sk->window_seq = sk->write_seq;
3562         sk->rcv_ack_seq = sk->write_seq -1;
3563         sk->err = 0;
3564         sk->dummy_th.dest = usin->sin_port;
3565         release_sock(sk);
3566 
3567         buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3568         if (buff == NULL) 
3569         {
3570                 return(-ENOMEM);
3571         }
3572         sk->inuse = 1;
3573         buff->len = 24;
3574         buff->sk = sk;
3575         buff->free = 1;
3576         buff->localroute = sk->localroute;
3577         
3578         t1 = (struct tcphdr *) buff->data;
3579 
3580         /*
3581          *      Put in the IP header and routing stuff. 
3582          */
3583          
3584         rt=ip_rt_route(sk->daddr, NULL, NULL);
3585         
3586 
3587         /*
3588          *      We need to build the routing stuff from the things saved in skb. 
3589          */
3590 
3591         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3592                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3593         if (tmp < 0) 
3594         {
3595                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3596                 release_sock(sk);
3597                 return(-ENETUNREACH);
3598         }
3599 
3600         buff->len += tmp;
3601         t1 = (struct tcphdr *)((char *)t1 +tmp);
3602 
3603         memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3604         t1->seq = ntohl(sk->write_seq++);
3605         sk->sent_seq = sk->write_seq;
3606         buff->h.seq = sk->write_seq;
3607         t1->ack = 0;
3608         t1->window = 2;
3609         t1->res1=0;
3610         t1->res2=0;
3611         t1->rst = 0;
3612         t1->urg = 0;
3613         t1->psh = 0;
3614         t1->syn = 1;
3615         t1->urg_ptr = 0;
3616         t1->doff = 6;
3617         /* use 512 or whatever user asked for */
3618         
3619         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
3620                 sk->window_clamp=rt->rt_window;
3621         else
3622                 sk->window_clamp=0;
3623 
3624         if (sk->user_mss)
3625                 sk->mtu = sk->user_mss;
3626         else if(rt!=NULL && (rt->rt_flags&RTF_MTU))
3627                 sk->mtu = rt->rt_mss;
3628         else 
3629         {
3630 #ifdef CONFIG_INET_SNARL
3631                 if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3632 #else
3633                 if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3634 #endif
3635                         sk->mtu = 576 - HEADER_SIZE;
3636                 else
3637                         sk->mtu = MAX_WINDOW;
3638         }
3639         /*
3640          *      but not bigger than device MTU 
3641          */
3642 
3643         if(sk->mtu <32)
3644                 sk->mtu = 32;   /* Sanity limit */
3645                 
3646         sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3647         
3648         /*
3649          *      Put in the TCP options to say MTU. 
3650          */
3651 
3652         ptr = (unsigned char *)(t1+1);
3653         ptr[0] = 2;
3654         ptr[1] = 4;
3655         ptr[2] = (sk->mtu) >> 8;
3656         ptr[3] = (sk->mtu) & 0xff;
3657         tcp_send_check(t1, sk->saddr, sk->daddr,
3658                   sizeof(struct tcphdr) + 4, sk);
3659 
3660         /*
3661          *      This must go first otherwise a really quick response will get reset. 
3662          */
3663 
3664         sk->state = TCP_SYN_SENT;
3665 /*      sk->rtt = TCP_CONNECT_TIME;*/
3666         sk->rto = TCP_TIMEOUT_INIT;
3667         reset_timer(sk, TIME_WRITE, sk->rto);   /* Timer for repeating the SYN until an answer */
3668         sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3669 
3670         sk->prot->queue_xmit(sk, dev, buff, 0);  
3671         tcp_statistics.TcpActiveOpens++;
3672         tcp_statistics.TcpOutSegs++;
3673   
3674         release_sock(sk);
3675         return(0);
3676 }
3677 
3678 
3679 /* This functions checks to see if the tcp header is actually acceptable. */
3680 static int
3681 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3682              struct options *opt, unsigned long saddr, struct device *dev)
3683 {
3684         unsigned long next_seq;
3685 
3686         next_seq = len - 4*th->doff;
3687         if (th->fin)
3688                 next_seq++;
3689         /* if we have a zero window, we can't have any data in the packet.. */
3690         if (next_seq && !sk->window)
3691                 goto ignore_it;
3692         next_seq += th->seq;
3693 
3694         /*
3695          * This isn't quite right.  sk->acked_seq could be more recent
3696          * than sk->window.  This is however close enough.  We will accept
3697          * slightly more packets than we should, but it should not cause
3698          * problems unless someone is trying to forge packets.
3699          */
3700 
3701         /* have we already seen all of this packet? */
3702         if (!after(next_seq+1, sk->acked_seq))
3703                 goto ignore_it;
3704         /* or does it start beyond the window? */
3705         if (!before(th->seq, sk->acked_seq + sk->window + 1))
3706                 goto ignore_it;
3707 
3708         /* ok, at least part of this packet would seem interesting.. */
3709         return 1;
3710 
3711 ignore_it:
3712         if (th->rst)
3713                 return 0;
3714 
3715         /*
3716          *      Send a reset if we get something not ours and we are
3717          *      unsynchronized. Note: We don't do anything to our end. We
3718          *      are just killing the bogus remote connection then we will
3719          *      connect again and it will work (with luck).
3720          */
3721          
3722         if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) {
3723                 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3724                 return 1;
3725         }
3726 
3727         /* Try to resync things. */
3728         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3729         return 0;
3730 }
3731 
3732 
3733 #ifdef TCP_FASTPATH
3734 /*
3735  *      Is the end of the queue clear of fragments as yet unmerged into the data stream
3736  *      Yes if
3737  *      a) The queue is empty
3738  *      b) The last frame on the queue has the acked flag set
3739  */
3740 
3741 static inline int tcp_clean_end(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3742 {
3743         struct sk_buff *skb=skb_peek(&sk->receive_queue);
3744         if(skb==NULL || sk->receive_queue.prev->acked)
3745                 return 1;
3746 }
3747 
3748 #endif
3749 
3750 int
3751 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3752         unsigned long daddr, unsigned short len,
3753         unsigned long saddr, int redo, struct inet_protocol * protocol)
3754 {
3755         struct tcphdr *th;
3756         struct sock *sk;
3757 
3758         if (!skb) 
3759         {
3760                 return(0);
3761         }
3762 
3763         if (!dev) 
3764         {
3765                 return(0);
3766         }
3767   
3768         tcp_statistics.TcpInSegs++;
3769   
3770         if(skb->pkt_type!=PACKET_HOST)
3771         {
3772                 kfree_skb(skb,FREE_READ);
3773                 return(0);
3774         }
3775   
3776         th = skb->h.th;
3777 
3778         /*
3779          *      Find the socket.
3780          */
3781 
3782         sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3783 
3784         /*
3785          *      If this socket has got a reset its to all intents and purposes 
3786          *      really dead 
3787          */
3788          
3789         if (sk!=NULL && sk->zapped)
3790                 sk=NULL;
3791 
3792         if (!redo) 
3793         {
3794                 if (tcp_check(th, len, saddr, daddr )) 
3795                 {
3796                         skb->sk = NULL;
3797                         kfree_skb(skb,FREE_READ);
3798                         /*
3799                          * We don't release the socket because it was
3800                          * never marked in use.
3801                          */
3802                         return(0);
3803                 }
3804                 th->seq = ntohl(th->seq);
3805 
3806                 /* See if we know about the socket. */
3807                 if (sk == NULL) 
3808                 {
3809                         if (!th->rst)
3810                                 tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3811                         skb->sk = NULL;
3812                         kfree_skb(skb, FREE_READ);
3813                         return(0);
3814                 }
3815 
3816                 skb->len = len;
3817                 skb->sk = sk;
3818                 skb->acked = 0;
3819                 skb->used = 0;
3820                 skb->free = 0;
3821                 skb->saddr = daddr;
3822                 skb->daddr = saddr;
3823         
3824                 /* We may need to add it to the backlog here. */
3825                 cli();
3826                 if (sk->inuse) 
3827                 {
3828                         skb_queue_head(&sk->back_log, skb);
3829                         sti();
3830                         return(0);
3831                 }
3832                 sk->inuse = 1;
3833                 sti();
3834         }
3835         else
3836         {
3837                 if (!sk) 
3838                 {
3839                         return(0);
3840                 }
3841         }
3842 
3843 
3844         if (!sk->prot) 
3845         {
3846                 return(0);
3847         }
3848 
3849 
3850         /*
3851          *      Charge the memory to the socket. 
3852          */
3853          
3854         if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) 
3855         {
3856                 skb->sk = NULL;
3857                 kfree_skb(skb, FREE_READ);
3858                 release_sock(sk);
3859                 return(0);
3860         }
3861 
3862         sk->rmem_alloc += skb->mem_len;
3863 
3864 #ifdef TCP_FASTPATH
3865 /*
3866  *      Incoming data stream fastpath. 
3867  *
3868  *      We try to optimise two things.
3869  *      1) Spot general data arriving without funny options and skip extra checks and the switch.
3870  *      2) Spot the common case in raw data receive streams of a packet that has no funny options,
3871  *      fits exactly on the end of the current queue and may or may not have the ack bit set.
3872  *
3873  *      Case two especially is done inline in this routine so there are no long jumps causing heavy
3874  *      cache thrashing, no function call overhead (except for the ack sending if needed) and for
3875  *      speed although further optimizing here is possible.
3876  */
3877  
3878         /* I'm trusting gcc to optimise this sensibly... might need judicious application of a software mallet */
3879         if(!(sk->shutdown & RCV_SHUTDOWN) && sk->state==TCP_ESTABLISHED && !th->urg && !th->syn && !th->fin && !th->rst)
3880         {       
3881                 /* Packets in order. Fits window */
3882                 if(th->seq == sk->acked_seq+1 && sk->window && tcp_clean_end(sk))
3883                 {
3884                         /* Ack is harder */
3885                         if(th->ack && !tcp_ack(sk, th, saddr, len))
3886                         {
3887                                 kfree_skb(skb, FREE_READ);
3888                                 release_sock(sk);
3889                                 return 0;
3890                         }
3891                         /*
3892                          *      Set up variables
3893                          */
3894                         skb->len -= (th->doff *4);
3895                         sk->bytes_rcv += skb->len;
3896                         tcp_rx_hit2++;
3897                         if(skb->len)
3898                         {
3899                                 skb_queue_tail(&sk->receive_queue,skb); /* We already know where to put it */
3900                                 if(sk->window >= skb->len)
3901                                         sk->window-=skb->len;                   /* We know its effect on the window */
3902                                 else
3903                                         sk->window=0;
3904                                 sk->acked_seq = th->seq+skb->len;       /* Easy */
3905                                 skb->acked=1;                           /* Guaranteed true */
3906                                 if(!sk->delay_acks || sk->ack_backlog >= sk->max_ack_backlog || 
3907                                         sk->bytes_rcv > sk->max_unacked)
3908                                 {
3909                                         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th , saddr);
3910                                 }
3911                                 else
3912                                 {
3913                                         sk->ack_backlog++;
3914                                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3915                                 }
3916                                 if(!sk->dead)
3917                                         sk->data_ready(sk,0);
3918                                 release_sock(sk);
3919                                 return 0;
3920                         }
3921                 }
3922                 /*
3923                  *      More generic case of arriving data stream in ESTABLISHED
3924                  */
3925                 tcp_rx_hit1++;
3926                 if(!tcp_sequence(sk, th, len, opt, saddr, dev))
3927                 {
3928                         kfree_skb(skb, FREE_READ);
3929                         release_sock(sk);
3930                         return 0;
3931                 }
3932                 if(th->ack && !tcp_ack(sk, th, saddr, len))
3933                 {
3934                         kfree_skb(skb, FREE_READ);
3935                         release_sock(sk);
3936                         return 0;
3937                 }
3938                 if(tcp_data(skb, sk, saddr, len))
3939                         kfree_skb(skb, FREE_READ);
3940                 release_sock(sk);
3941                 return 0;
3942         }
3943         tcp_rx_miss++;
3944 #endif  
3945 
3946         /*
3947          *      Now deal with all cases.
3948          */
3949          
3950         switch(sk->state) 
3951         {
3952         
3953                 /*
3954                  * This should close the system down if it's waiting
3955                  * for an ack that is never going to be sent.
3956                  */
3957                 case TCP_LAST_ACK:
3958                         if (th->rst) 
3959                         {
3960                                 sk->zapped=1;
3961                                 sk->err = ECONNRESET;
3962                                 sk->state = TCP_CLOSE;
3963                                 sk->shutdown = SHUTDOWN_MASK;
3964                                 if (!sk->dead) 
3965                                 {
3966                                         sk->state_change(sk);
3967                                 }
3968                                 kfree_skb(skb, FREE_READ);
3969                                 release_sock(sk);
3970                                 return(0);
3971                         }
3972 
3973                 case TCP_ESTABLISHED:
3974                 case TCP_CLOSE_WAIT:
3975                 case TCP_CLOSING:
3976                 case TCP_FIN_WAIT1:
3977                 case TCP_FIN_WAIT2:
3978                 case TCP_TIME_WAIT:
3979                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
3980                         {
3981                                 kfree_skb(skb, FREE_READ);
3982                                 release_sock(sk);
3983                                 return(0);
3984                         }
3985 
3986                         if (th->rst) 
3987                         {
3988                                 tcp_statistics.TcpEstabResets++;
3989                                 tcp_statistics.TcpCurrEstab--;
3990                                 sk->zapped=1;
3991                                 /* This means the thing should really be closed. */
3992                                 sk->err = ECONNRESET;
3993                                 if (sk->state == TCP_CLOSE_WAIT) 
3994                                 {
3995                                         sk->err = EPIPE;
3996                                 }
3997         
3998                                 /*
3999                                  * A reset with a fin just means that
4000                                  * the data was not all read.
4001                                  */
4002                                 sk->state = TCP_CLOSE;
4003                                 sk->shutdown = SHUTDOWN_MASK;
4004                                 if (!sk->dead) 
4005                                 {
4006                                         sk->state_change(sk);
4007                                 }
4008                                 kfree_skb(skb, FREE_READ);
4009                                 release_sock(sk);
4010                                 return(0);
4011                         }
4012                         if (th->syn) 
4013                         {
4014                                 tcp_statistics.TcpCurrEstab--;
4015                                 tcp_statistics.TcpEstabResets++;
4016                                 sk->err = ECONNRESET;
4017                                 sk->state = TCP_CLOSE;
4018                                 sk->shutdown = SHUTDOWN_MASK;
4019                                 tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
4020                                 if (!sk->dead) {
4021                                         sk->state_change(sk);
4022                                 }
4023                                 kfree_skb(skb, FREE_READ);
4024                                 release_sock(sk);
4025                                 return(0);
4026                         }
4027         
4028                         if (th->ack && !tcp_ack(sk, th, saddr, len)) {
4029                                 kfree_skb(skb, FREE_READ);
4030                                 release_sock(sk);
4031                                 return(0);
4032                         }
4033         
4034                         if (tcp_urg(sk, th, saddr, len)) {
4035                                 kfree_skb(skb, FREE_READ);
4036                                 release_sock(sk);
4037                                 return(0);
4038                         }
4039 
4040         
4041                         if (tcp_data(skb, sk, saddr, len)) {
4042                                 kfree_skb(skb, FREE_READ);
4043                                 release_sock(sk);
4044                                 return(0);
4045                         }       
4046 
4047                         if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) {
4048                                 kfree_skb(skb, FREE_READ);
4049                                 release_sock(sk);
4050                                 return(0);
4051                         }
4052         
4053                         release_sock(sk);
4054                         return(0);
4055                 
4056                 case TCP_CLOSE:
4057                         if (sk->dead || sk->daddr) {
4058                                 kfree_skb(skb, FREE_READ);
4059                                         release_sock(sk);
4060                                 return(0);
4061                         }
4062         
4063                         if (!th->rst) {
4064                                 if (!th->ack)
4065                                         th->ack_seq = 0;
4066                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4067                         }
4068                         kfree_skb(skb, FREE_READ);
4069                         release_sock(sk);
4070                                 return(0);
4071         
4072                 case TCP_LISTEN:
4073                         if (th->rst) {
4074                                 kfree_skb(skb, FREE_READ);
4075                                 release_sock(sk);
4076                                 return(0);
4077                         }
4078                         if (th->ack) {
4079                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4080                                 kfree_skb(skb, FREE_READ);
4081                                 release_sock(sk);
4082                                 return(0);
4083                         }
4084         
4085                         if (th->syn) 
4086                         {
4087                                 /*
4088                                  * Now we just put the whole thing including
4089                                  * the header and saddr, and protocol pointer
4090                                  * into the buffer.  We can't respond until the
4091                                  * user tells us to accept the connection.
4092                                  */
4093                                 tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
4094                                 release_sock(sk);
4095                                 return(0);
4096                         }
4097 
4098                         kfree_skb(skb, FREE_READ);
4099                         release_sock(sk);
4100                         return(0);
4101 
4102                 case TCP_SYN_RECV:
4103                         if (th->syn) {
4104                                 /* Probably a retransmitted syn */
4105                                 kfree_skb(skb, FREE_READ);
4106                                 release_sock(sk);
4107                                 return(0);
4108                         }
4109         
4110         
4111                 default:
4112                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
4113                         {
4114                                 kfree_skb(skb, FREE_READ);
4115                                 release_sock(sk);
4116                                 return(0);
4117                         }
4118         
4119                 case TCP_SYN_SENT:
4120                         if (th->rst) 
4121                         {
4122                                 tcp_statistics.TcpAttemptFails++;
4123                                 sk->err = ECONNREFUSED;
4124                                 sk->state = TCP_CLOSE;
4125                                 sk->shutdown = SHUTDOWN_MASK;
4126                                 sk->zapped = 1;
4127                                 if (!sk->dead) 
4128                                 {
4129                                         sk->state_change(sk);
4130                                 }
4131                                 kfree_skb(skb, FREE_READ);
4132                                 release_sock(sk);
4133                                 return(0);
4134                         }
4135                         if (!th->ack) 
4136                         {
4137                                 if (th->syn) 
4138                                 {
4139                                         sk->state = TCP_SYN_RECV;
4140                                 }
4141                                 kfree_skb(skb, FREE_READ);
4142                                 release_sock(sk);
4143                                 return(0);
4144                         }
4145         
4146                         switch(sk->state) 
4147                         {
4148                                 case TCP_SYN_SENT:
4149                                         if (!tcp_ack(sk, th, saddr, len)) 
4150                                         {
4151                                                 tcp_statistics.TcpAttemptFails++;
4152                                                 tcp_reset(daddr, saddr, th,
4153                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4154                                                 kfree_skb(skb, FREE_READ);
4155                                                         release_sock(sk);
4156                                                 return(0);
4157                                         }
4158         
4159                                         /*
4160                                          * If the syn bit is also set, switch to
4161                                          * tcp_syn_recv, and then to established.
4162                                          */
4163                                         if (!th->syn) 
4164                                         {
4165                                                 kfree_skb(skb, FREE_READ);
4166                                                 release_sock(sk);
4167                                                 return(0);
4168                                         }
4169         
4170                                         /* Ack the syn and fall through. */
4171                                         sk->acked_seq = th->seq+1;
4172                                         sk->fin_seq = th->seq;
4173                                         tcp_send_ack(sk->sent_seq, th->seq+1,
4174                                                 sk, th, sk->daddr);
4175                 
4176                                 case TCP_SYN_RECV:
4177                                         if (!tcp_ack(sk, th, saddr, len)) 
4178                                         {
4179                                                 tcp_statistics.TcpAttemptFails++;
4180                                                 tcp_reset(daddr, saddr, th,
4181                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
4182                                                 kfree_skb(skb, FREE_READ);
4183                                                 release_sock(sk);
4184                                                 return(0);
4185                                         }
4186         
4187                                         tcp_statistics.TcpCurrEstab++;
4188                                         sk->state = TCP_ESTABLISHED;
4189         
4190                                         /*
4191                                          *      Now we need to finish filling out
4192                                          *      some of the tcp header.
4193                                          * 
4194                                          *      We need to check for mtu info. 
4195                                          */
4196                                         tcp_options(sk, th);
4197                                         sk->dummy_th.dest = th->source;
4198                                         sk->copied_seq = sk->acked_seq-1;
4199                                         if (!sk->dead) 
4200                                         {
4201                                                 sk->state_change(sk);
4202                                         }
4203         
4204                                         /*
4205                                          * We've already processed his first
4206                                          * ack.  In just about all cases that
4207                                          * will have set max_window.  This is
4208                                          * to protect us against the possibility
4209                                          * that the initial window he sent was 0.
4210                                          * This must occur after tcp_options, which
4211                                          * sets sk->mtu.
4212                                          */
4213                                         if (sk->max_window == 0) 
4214                                         {
4215                                                 sk->max_window = 32;
4216                                                 sk->mss = min(sk->max_window, sk->mtu);
4217                                         }
4218 
4219                                         /*
4220                                          * Now process the rest like we were
4221                                          * already in the established state.
4222                                          */
4223                                         if (th->urg) 
4224                                         {
4225                                                 if (tcp_urg(sk, th, saddr, len)) 
4226                                                 { 
4227                                                         kfree_skb(skb, FREE_READ);
4228                                                         release_sock(sk);
4229                                                         return(0);
4230                                                 }
4231                                         }
4232                                         if (tcp_data(skb, sk, saddr, len))
4233                                                 kfree_skb(skb, FREE_READ);
4234 
4235                                         if (th->fin)
4236                                                 tcp_fin(skb, sk, th, saddr, dev);
4237                                         release_sock(sk);
4238                                         return(0);
4239                         }
4240         
4241                         if (th->urg) 
4242                         {
4243                                 if (tcp_urg(sk, th, saddr, len)) 
4244                                 {
4245                                         kfree_skb(skb, FREE_READ);
4246                                         release_sock(sk);
4247                                         return(0);
4248                                 }
4249                         }
4250                         if (tcp_data(skb, sk, saddr, len)) 
4251                         {
4252                                 kfree_skb(skb, FREE_READ);
4253                                 release_sock(sk);
4254                                 return(0);
4255                         }
4256         
4257                         if (!th->fin) 
4258                         {
4259                                 release_sock(sk);
4260                                 return(0);
4261                         }
4262                         tcp_fin(skb, sk, th, saddr, dev);
4263                         release_sock(sk);
4264                         return(0);
4265         }
4266 }
4267 
4268 
4269 /*
4270  * This routine sends a packet with an out of date sequence
4271  * number. It assumes the other end will try to ack it.
4272  */
4273 
4274 static void tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4275 {
4276         struct sk_buff *buff;
4277         struct tcphdr *t1;
4278         struct device *dev=NULL;
4279         int tmp;
4280 
4281         if (sk->zapped)
4282                 return; /* After a valid reset we can send no more */
4283 
4284         /*
4285          * Write data can still be transmitted/retransmitted in the
4286          * following states.  If any other state is encountered, return.
4287          */
4288 
4289         if (sk->state != TCP_ESTABLISHED && 
4290             sk->state != TCP_CLOSE_WAIT &&
4291             sk->state != TCP_FIN_WAIT1 && 
4292             sk->state != TCP_LAST_ACK &&
4293             sk->state != TCP_CLOSING
4294         ) {
4295                 return;
4296         }
4297 
4298         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
4299         if (buff == NULL) 
4300                 return;
4301 
4302         buff->len = sizeof(struct tcphdr);
4303         buff->free = 1;
4304         buff->sk = sk;
4305         buff->localroute = sk->localroute;
4306 
4307         t1 = (struct tcphdr *) buff->data;
4308 
4309         /* Put in the IP header and routing stuff. */
4310         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
4311                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
4312         if (tmp < 0) 
4313         {
4314                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
4315                 return;
4316         }
4317 
4318         buff->len += tmp;
4319         t1 = (struct tcphdr *)((char *)t1 +tmp);
4320 
4321         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
4322 
4323         /*
4324          * Use a previous sequence.
4325          * This should cause the other end to send an ack.
4326          */
4327         t1->seq = htonl(sk->sent_seq-1);
4328         t1->ack = 1; 
4329         t1->res1= 0;
4330         t1->res2= 0;
4331         t1->rst = 0;
4332         t1->urg = 0;
4333         t1->psh = 0;
4334         t1->fin = 0;
4335         t1->syn = 0;
4336         t1->ack_seq = ntohl(sk->acked_seq);
4337         t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
4338         t1->doff = sizeof(*t1)/4;
4339         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
4340 
4341          /*     Send it and free it.
4342           *     This will prevent the timer from automatically being restarted.
4343           */
4344         sk->prot->queue_xmit(sk, dev, buff, 1);
4345         tcp_statistics.TcpOutSegs++;
4346 }
4347 
4348 void
4349 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4350 {
4351         if (sk->zapped)
4352                 return;         /* After a valid reset we can send no more */
4353 
4354         tcp_write_wakeup(sk);
4355 
4356         sk->backoff++;
4357         sk->rto = min(sk->rto << 1, 120*HZ);
4358         reset_timer (sk, TIME_PROBE0, sk->rto);
4359         sk->retransmits++;
4360         sk->prot->retransmits ++;
4361 }
4362 
4363 /*
4364  *      Socket option code for TCP. 
4365  */
4366   
4367 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4368 {
4369         int val,err;
4370 
4371         if(level!=SOL_TCP)
4372                 return ip_setsockopt(sk,level,optname,optval,optlen);
4373 
4374         if (optval == NULL) 
4375                 return(-EINVAL);
4376 
4377         err=verify_area(VERIFY_READ, optval, sizeof(int));
4378         if(err)
4379                 return err;
4380         
4381         val = get_fs_long((unsigned long *)optval);
4382 
4383         switch(optname)
4384         {
4385                 case TCP_MAXSEG:
4386 /*                      if(val<200||val>2048 || val>sk->mtu) */
4387 /*
4388  * values greater than interface MTU won't take effect.  however at
4389  * the point when this call is done we typically don't yet know
4390  * which interface is going to be used
4391  */
4392                         if(val<1||val>MAX_WINDOW)
4393                                 return -EINVAL;
4394                         sk->user_mss=val;
4395                         return 0;
4396                 case TCP_NODELAY:
4397                         sk->nonagle=(val==0)?0:1;
4398                         return 0;
4399                 default:
4400                         return(-ENOPROTOOPT);
4401         }
4402 }
4403 
4404 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4405 {
4406         int val,err;
4407 
4408         if(level!=SOL_TCP)
4409                 return ip_getsockopt(sk,level,optname,optval,optlen);
4410                         
4411         switch(optname)
4412         {
4413                 case TCP_MAXSEG:
4414                         val=sk->user_mss;
4415                         break;
4416                 case TCP_NODELAY:
4417                         val=sk->nonagle;        /* Until Johannes stuff is in */
4418                         break;
4419                 default:
4420                         return(-ENOPROTOOPT);
4421         }
4422         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
4423         if(err)
4424                 return err;
4425         put_fs_long(sizeof(int),(unsigned long *) optlen);
4426 
4427         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
4428         if(err)
4429                 return err;
4430         put_fs_long(val,(unsigned long *)optval);
4431 
4432         return(0);
4433 }       
4434 
4435 
4436 struct proto tcp_prot = {
4437         sock_wmalloc,
4438         sock_rmalloc,
4439         sock_wfree,
4440         sock_rfree,
4441         sock_rspace,
4442         sock_wspace,
4443         tcp_close,
4444         tcp_read,
4445         tcp_write,
4446         tcp_sendto,
4447         tcp_recvfrom,
4448         ip_build_header,
4449         tcp_connect,
4450         tcp_accept,
4451         ip_queue_xmit,
4452         tcp_retransmit,
4453         tcp_write_wakeup,
4454         tcp_read_wakeup,
4455         tcp_rcv,
4456         tcp_select,
4457         tcp_ioctl,
4458         NULL,
4459         tcp_shutdown,
4460         tcp_setsockopt,
4461         tcp_getsockopt,
4462         128,
4463         0,
4464         {NULL,},
4465         "TCP"
4466 };

/* [previous][next][first][last][top][bottom][index][help] */