root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. tcp_select_window
  3. tcp_time_wait
  4. tcp_retransmit
  5. tcp_err
  6. tcp_readable
  7. tcp_select
  8. tcp_ioctl
  9. tcp_check
  10. tcp_send_check
  11. tcp_send_skb
  12. tcp_dequeue_partial
  13. tcp_send_partial
  14. tcp_enqueue_partial
  15. tcp_send_ack
  16. tcp_build_header
  17. tcp_write
  18. tcp_sendto
  19. tcp_read_wakeup
  20. cleanup_rbuf
  21. tcp_read_urg
  22. tcp_read
  23. tcp_shutdown
  24. tcp_recvfrom
  25. tcp_reset
  26. tcp_options
  27. default_mask
  28. tcp_conn_request
  29. tcp_close
  30. tcp_write_xmit
  31. sort_send
  32. tcp_ack
  33. tcp_data
  34. tcp_check_urg
  35. tcp_urg
  36. tcp_fin
  37. tcp_accept
  38. tcp_connect
  39. tcp_sequence
  40. tcp_clean_end
  41. tcp_rcv
  42. tcp_write_wakeup
  43. tcp_send_probe0
  44. tcp_setsockopt
  45. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@no.unit.nvg>
  20  *
  21  * Fixes:       
  22  *              Alan Cox        :       Numerous verify_area() calls
  23  *              Alan Cox        :       Set the ACK bit on a reset
  24  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  25  *                                      and was trying to connect (tcp_err()).
  26  *              Alan Cox        :       All icmp error handling was broken
  27  *                                      pointers passed where wrong and the
  28  *                                      socket was looked up backwards. Nobody
  29  *                                      tested any icmp error code obviously.
  30  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  31  *                                      on errors. select behaves and the icmp error race
  32  *                                      has gone by moving it into sock.c
  33  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  34  *                                      packets for unknown sockets.
  35  *              Alan Cox        :       tcp option processing.
  36  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  37  *              Herp Rosmanith  :       More reset fixes
  38  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  39  *                                      any kind of RST is right out.
  40  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  41  *                                      otherwise odd bits of prattle escape still
  42  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  43  *                                      LAN workplace lockups.
  44  *              Alan Cox        :       Some tidyups using the new skb list facilities
  45  *              Alan Cox        :       sk->keepopen now seems to work
  46  *              Alan Cox        :       Pulls options out correctly on accepts
  47  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  48  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  49  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  50  *              Alan Cox        :       Added some beter commenting, as the tcp is hard to follow
  51  *              Alan Cox        :       Removed incorrect check for 20 * psh
  52  *      Michael O'Reilly        :       ack < copied bug fix.
  53  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  54  *              Alan Cox        :       FIN with no memory -> CRASH
  55  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  56  *              Alan Cox        :       Added TCP options (SOL_TCP)
  57  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  58  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  59  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  60  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  61  *              Alan Cox        :       Put in missing check for SYN bit.
  62  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  63  *                                      window non shrink trick.
  64  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  65  *              Charles Hedrick :       TCP fixes
  66  *              Toomas Tamm     :       TCP window fixes
  67  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  68  *              Charles Hedrick :       Rewrote most of it to actually work
  69  *              Linus           :       Rewrote tcp_read() and URG handling
  70  *                                      completely
  71  *              Gerhard Koerting:       Fixed some missing timer handling
  72  *              Matthew Dillon  :       Reworked TCP machine states as per RFC
  73  *              Gerhard Koerting:       PC/TCP workarounds
  74  *              Adam Caldwell   :       Assorted timer/timing errors
  75  *              Matthew Dillon  :       Fixed another RST bug
  76  *              Alan Cox        :       Move to kernel side addressing changes.
  77  *              Alan Cox        :       Beginning work on TCP fastpathing (not yet usable)
  78  *              Arnt Gulbrandsen:       Turbocharged tcp_check() routine.
  79  *              Alan Cox        :       TCP fast path debugging
  80  *              Alan Cox        :       Window clamping
  81  *              Michael Riepe   :       Bug in tcp_check()
  82  *
  83  *
  84  * To Fix:
  85  *                      Possibly a problem with accept(). BSD accept never fails after
  86  *              it causes a select. Linux can - given the official select semantics I
  87  *              feel that _really_ its the BSD network programs that are bust (notably
  88  *              inetd, which hangs occasionally because of this).
  89  *
  90  *                      Fast path the code. Two things here - fix the window calculation
  91  *              so it doesn't iterate over the queue, also spot packets with no funny
  92  *              options arriving in order and process directly.
  93  *
  94  *              This program is free software; you can redistribute it and/or
  95  *              modify it under the terms of the GNU General Public License
  96  *              as published by the Free Software Foundation; either version
  97  *              2 of the License, or(at your option) any later version.
  98  *
  99  * Description of States:
 100  *
 101  *      TCP_SYN_SENT            sent a connection request, waiting for ack
 102  *
 103  *      TCP_SYN_RECV            received a connection request, sent ack,
 104  *                              waiting for final ack in three-way handshake.
 105  *
 106  *      TCP_ESTABLISHED         connection established
 107  *
 108  *      TCP_FIN_WAIT1           our side has shutdown, waiting to complete
 109  *                              transmission of remaining buffered data
 110  *
 111  *      TCP_FIN_WAIT2           all buffered data sent, waiting for remote
 112  *                              to shutdown
 113  *
 114  *      TCP_CLOSING             both sides have shutdown but we still have
 115  *                              data we have to finish sending
 116  *
 117  *      TCP_TIME_WAIT           timeout to catch resent junk before entering
 118  *                              closed, can only be entered from FIN_WAIT2
 119  *                              or CLOSING.  Required because the other end
 120  *                              may not have gotten our last ACK causing it
 121  *                              to retransmit the data packet (which we ignore)
 122  *
 123  *      TCP_CLOSE_WAIT          remote side has shutdown and is waiting for
 124  *                              us to finish writing our data and to shutdown
 125  *                              (we have to close() to move on to LAST_ACK)
 126  *
 127  *      TCP_LAST_ACK            out side has shutdown after remote has
 128  *                              shutdown.  There may still be data in our
 129  *                              buffer that we have to finish sending
 130  *              
 131  *      TCP_CLOSED              socket is finished
 132  */
 133 #include <linux/types.h>
 134 #include <linux/sched.h>
 135 #include <linux/mm.h>
 136 #include <linux/string.h>
 137 #include <linux/socket.h>
 138 #include <linux/sockios.h>
 139 #include <linux/termios.h>
 140 #include <linux/in.h>
 141 #include <linux/fcntl.h>
 142 #include <linux/inet.h>
 143 #include <linux/netdevice.h>
 144 #include "snmp.h"
 145 #include "ip.h"
 146 #include "protocol.h"
 147 #include "icmp.h"
 148 #include "tcp.h"
 149 #include <linux/skbuff.h>
 150 #include "sock.h"
 151 #include "route.h"
 152 #include <linux/errno.h>
 153 #include <linux/timer.h>
 154 #include <asm/system.h>
 155 #include <asm/segment.h>
 156 #include <linux/mm.h>
 157 
 158 #undef TCP_FASTPATH
 159 
 160 #define SEQ_TICK 3
 161 unsigned long seq_offset;
 162 struct tcp_mib  tcp_statistics;
 163 
 164 #ifdef TCP_FASTPATH
 165 unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0;
 166 #endif
 167 
 168 
 169 static __inline__ int min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 170 {
 171         if (a < b) 
 172                 return(a);
 173         return(b);
 174 }
 175 
 176 
 177 /* This routine picks a TCP windows for a socket based on
 178    the following constraints
 179    
 180    1. The window can never be shrunk once it is offered (RFC 793)
 181    2. We limit memory per socket
 182    
 183    For now we use NET2E3's heuristic of offering half the memory
 184    we have handy. All is not as bad as this seems however because
 185    of two things. Firstly we will bin packets even within the window
 186    in order to get the data we are waiting for into the memory limit.
 187    Secondly we bin common duplicate forms at receive time
 188    
 189    Better heuristics welcome
 190 */
 191    
 192 int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 193 {
 194         int new_window = sk->prot->rspace(sk);
 195         
 196         if(sk->window_clamp)
 197                 new_window=min(sk->window_clamp,new_window);
 198 /*
 199  * two things are going on here.  First, we don't ever offer a
 200  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 201  * receiver side of SWS as specified in RFC1122.
 202  * Second, we always give them at least the window they
 203  * had before, in order to avoid retracting window.  This
 204  * is technically allowed, but RFC1122 advises against it and
 205  * in practice it causes trouble.
 206  */
 207         if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window)
 208                 return(sk->window);
 209         return(new_window);
 210 }
 211 
 212 /*
 213  *      Enter the time wait state. 
 214  */
 215 
 216 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 217 {
 218         sk->state = TCP_TIME_WAIT;
 219         sk->shutdown = SHUTDOWN_MASK;
 220         if (!sk->dead)
 221                 sk->state_change(sk);
 222         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 223 }
 224 
 225 /*
 226  *      A timer event has trigger a tcp retransmit timeout. The
 227  *      socket xmit queue is ready and set up to send. Because
 228  *      the ack receive code keeps the queue straight we do
 229  *      nothing clever here.
 230  */
 231 
 232 static void tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 233 {
 234         if (all) 
 235         {
 236                 ip_retransmit(sk, all);
 237                 return;
 238         }
 239 
 240         sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 241         /* sk->ssthresh in theory can be zero.  I guess that's OK */
 242         sk->cong_count = 0;
 243 
 244         sk->cong_window = 1;
 245 
 246         /* Do the actual retransmit. */
 247         ip_retransmit(sk, all);
 248 }
 249 
 250 
 251 /*
 252  * This routine is called by the ICMP module when it gets some
 253  * sort of error condition.  If err < 0 then the socket should
 254  * be closed and the error returned to the user.  If err > 0
 255  * it's just the icmp type << 8 | icmp code.  After adjustment
 256  * header points to the first 8 bytes of the tcp header.  We need
 257  * to find the appropriate port.
 258  */
 259 
 260 void tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 261         unsigned long saddr, struct inet_protocol *protocol)
 262 {
 263         struct tcphdr *th;
 264         struct sock *sk;
 265         struct iphdr *iph=(struct iphdr *)header;
 266   
 267         header+=4*iph->ihl;
 268    
 269 
 270         th =(struct tcphdr *)header;
 271         sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr);
 272 
 273         if (sk == NULL) 
 274                 return;
 275   
 276         if(err<0)
 277         {
 278                 sk->err = -err;
 279                 sk->error_report(sk);
 280                 return;
 281         }
 282 
 283         if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) 
 284         {
 285                 /*
 286                  * FIXME:
 287                  * For now we will just trigger a linear backoff.
 288                  * The slow start code should cause a real backoff here.
 289                  */
 290                 if (sk->cong_window > 4)
 291                         sk->cong_window--;
 292                 return;
 293         }
 294 
 295 /*      sk->err = icmp_err_convert[err & 0xff].errno;  -- moved as TCP should hide non fatals internally (and does) */
 296 
 297         /*
 298          * If we've already connected we will keep trying
 299          * until we time out, or the user gives up.
 300          */
 301 
 302         if (icmp_err_convert[err & 0xff].fatal) 
 303         {
 304                 if (sk->state == TCP_SYN_SENT) 
 305                 {
 306                         tcp_statistics.TcpAttemptFails++;
 307                         sk->state = TCP_CLOSE;
 308                         sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 309                 }
 310                 sk->err = icmp_err_convert[err & 0xff].errno;           
 311         }
 312         return;
 313 }
 314 
 315 
 316 /*
 317  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 318  *      in the received data queue (ie a frame missing that needs sending to us)
 319  */
 320 
 321 static int tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 322 {
 323         unsigned long counted;
 324         unsigned long amount;
 325         struct sk_buff *skb;
 326         int sum;
 327         unsigned long flags;
 328 
 329         if(sk && sk->debug)
 330                 printk("tcp_readable: %p - ",sk);
 331 
 332         save_flags(flags);
 333         cli();
 334         if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 335         {
 336                 restore_flags(flags);
 337                 if(sk && sk->debug) 
 338                         printk("empty\n");
 339                 return(0);
 340         }
 341   
 342         counted = sk->copied_seq+1;     /* Where we are at the moment */
 343         amount = 0;
 344   
 345         /* Do until a push or until we are out of data. */
 346         do 
 347         {
 348                 if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 349                         break;
 350                 sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 351                 if (skb->h.th->syn)
 352                         sum++;
 353                 if (sum >= 0) 
 354                 {                                       /* Add it up, move on */
 355                         amount += sum;
 356                         if (skb->h.th->syn) 
 357                                 amount--;
 358                         counted += sum;
 359                 }
 360                 if (amount && skb->h.th->psh) break;
 361                 skb = skb->next;
 362         }
 363         while(skb != (struct sk_buff *)&sk->receive_queue);
 364 
 365         if (amount && !sk->urginline && sk->urg_data &&
 366             (sk->urg_seq - sk->copied_seq) <= (counted - sk->copied_seq))
 367                 amount--;               /* don't count urg data */
 368         restore_flags(flags);
 369         if(sk->debug)
 370                 printk("got %lu bytes.\n",amount);
 371         return(amount);
 372 }
 373 
 374 
 375 /*
 376  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 377  *      listening socket has a receive queue of sockets to accept.
 378  */
 379 
 380 static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 381 {
 382         sk->inuse = 1;
 383 
 384         switch(sel_type) 
 385         {
 386                 case SEL_IN:
 387                         if(sk->debug)
 388                                 printk("select in");
 389                         select_wait(sk->sleep, wait);
 390                         if(sk->debug)
 391                                 printk("-select out");
 392                         if (skb_peek(&sk->receive_queue) != NULL) 
 393                         {
 394                                 if (sk->state == TCP_LISTEN || tcp_readable(sk)) 
 395                                 {
 396                                         release_sock(sk);
 397                                         if(sk->debug)
 398                                                 printk("-select ok data\n");
 399                                         return(1);
 400                                 }
 401                         }
 402                         if (sk->err != 0)       /* Receiver error */
 403                         {
 404                                 release_sock(sk);
 405                                 if(sk->debug)
 406                                         printk("-select ok error");
 407                                 return(1);
 408                         }
 409                         if (sk->shutdown & RCV_SHUTDOWN) 
 410                         {
 411                                 release_sock(sk);
 412                                 if(sk->debug)
 413                                         printk("-select ok down\n");
 414                                 return(1);
 415                         } 
 416                         else 
 417                         {
 418                                 release_sock(sk);
 419                                 if(sk->debug)
 420                                         printk("-select fail\n");
 421                                 return(0);
 422                         }
 423                 case SEL_OUT:
 424                         select_wait(sk->sleep, wait);
 425                         if (sk->shutdown & SEND_SHUTDOWN) 
 426                         {
 427                                 /* FIXME: should this return an error? */
 428                                 release_sock(sk);
 429                                 return(0);
 430                         }
 431 
 432                         /*
 433                          * FIXME:
 434                          * Hack so it will probably be able to write
 435                          * something if it says it's ok to write.
 436                          */
 437                         
 438                         if (sk->prot->wspace(sk) >= sk->mss) 
 439                         {
 440                                 release_sock(sk);
 441                                 /* This should cause connect to work ok. */
 442                                 if (sk->state == TCP_SYN_RECV ||
 443                                     sk->state == TCP_SYN_SENT) return(0);
 444                                 return(1);
 445                         }
 446                         release_sock(sk);
 447                         return(0);
 448                 case SEL_EX:
 449                         select_wait(sk->sleep,wait);
 450                         if (sk->err || sk->urg_data) 
 451                         {
 452                                 release_sock(sk);
 453                                 return(1);
 454                         }
 455                         release_sock(sk);
 456                         return(0);
 457         }
 458 
 459         release_sock(sk);
 460         return(0);
 461 }
 462 
 463 
 464 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 465 {
 466         int err;
 467         switch(cmd) 
 468         {
 469 
 470                 case TIOCINQ:
 471 #ifdef FIXME    /* FIXME: */
 472                 case FIONREAD:
 473 #endif
 474                 {
 475                         unsigned long amount;
 476 
 477                         if (sk->state == TCP_LISTEN) 
 478                                 return(-EINVAL);
 479 
 480                         sk->inuse = 1;
 481                         amount = tcp_readable(sk);
 482                         release_sock(sk);
 483                         err=verify_area(VERIFY_WRITE,(void *)arg,
 484                                                    sizeof(unsigned long));
 485                         if(err)
 486                                 return err;
 487                         put_fs_long(amount,(unsigned long *)arg);
 488                         return(0);
 489                 }
 490                 case SIOCATMARK:
 491                 {
 492                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
 493 
 494                         err = verify_area(VERIFY_WRITE,(void *) arg,
 495                                                   sizeof(unsigned long));
 496                         if (err)
 497                                 return err;
 498                         put_fs_long(answ,(int *) arg);
 499                         return(0);
 500                 }
 501                 case TIOCOUTQ:
 502                 {
 503                         unsigned long amount;
 504 
 505                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 506                         amount = sk->prot->wspace(sk);
 507                         err=verify_area(VERIFY_WRITE,(void *)arg,
 508                                                    sizeof(unsigned long));
 509                         if(err)
 510                                 return err;
 511                         put_fs_long(amount,(unsigned long *)arg);
 512                         return(0);
 513                 }
 514                 default:
 515                         return(-EINVAL);
 516         }
 517 }
 518 
 519 
 520 /*
 521  *      This routine computes a TCP checksum. 
 522  */
 523  
 524 unsigned short tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 525           unsigned long saddr, unsigned long daddr)
 526 {     
 527         unsigned long sum;
 528    
 529         if (saddr == 0) saddr = ip_my_addr();
 530 
 531 /*
 532  * stupid, gcc complains when I use just one __asm__ block,
 533  * something about too many reloads, but this is just two
 534  * instructions longer than what I want
 535  */
 536         __asm__("
 537             addl %%ecx, %%ebx
 538             adcl %%edx, %%ebx
 539             adcl $0, %%ebx
 540             "
 541         : "=b"(sum)
 542         : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 543         : "bx", "cx", "dx" );
 544         __asm__("
 545             movl %%ecx, %%edx
 546             cld
 547             cmpl $32, %%ecx
 548             jb 2f
 549             shrl $5, %%ecx
 550             clc
 551 1:          lodsl
 552             adcl %%eax, %%ebx
 553             lodsl
 554             adcl %%eax, %%ebx
 555             lodsl
 556             adcl %%eax, %%ebx
 557             lodsl
 558             adcl %%eax, %%ebx
 559             lodsl
 560             adcl %%eax, %%ebx
 561             lodsl
 562             adcl %%eax, %%ebx
 563             lodsl
 564             adcl %%eax, %%ebx
 565             lodsl
 566             adcl %%eax, %%ebx
 567             loop 1b
 568             adcl $0, %%ebx
 569             movl %%edx, %%ecx
 570 2:          andl $28, %%ecx
 571             je 4f
 572             shrl $2, %%ecx
 573             clc
 574 3:          lodsl
 575             adcl %%eax, %%ebx
 576             loop 3b
 577             adcl $0, %%ebx
 578 4:          movl $0, %%eax
 579             testw $2, %%dx
 580             je 5f
 581             lodsw
 582             addl %%eax, %%ebx
 583             adcl $0, %%ebx
 584             movw $0, %%ax
 585 5:          test $1, %%edx
 586             je 6f
 587             lodsb
 588             addl %%eax, %%ebx
 589             adcl $0, %%ebx
 590 6:          movl %%ebx, %%eax
 591             shrl $16, %%eax
 592             addw %%ax, %%bx
 593             adcw $0, %%bx
 594             "
 595         : "=b"(sum)
 596         : "0"(sum), "c"(len), "S"(th)
 597         : "ax", "bx", "cx", "dx", "si" );
 598 
 599         /* We only want the bottom 16 bits, but we never cleared the top 16. */
 600   
 601         return((~sum) & 0xffff);
 602 }
 603 
 604 
 605 
 606 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 607                 unsigned long daddr, int len, struct sock *sk)
 608 {
 609         th->check = 0;
 610         th->check = tcp_check(th, len, saddr, daddr);
 611         return;
 612 }
 613 
 614 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 615 {
 616         int size;
 617         struct tcphdr * th = skb->h.th;
 618 
 619         /* length of packet (not counting length of pre-tcp headers) */
 620         size = skb->len - ((unsigned char *) th - skb->data);
 621 
 622         /* sanity check it.. */
 623         if (size < sizeof(struct tcphdr) || size > skb->len) 
 624         {
 625                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 626                         skb, skb->data, th, skb->len);
 627                 kfree_skb(skb, FREE_WRITE);
 628                 return;
 629         }
 630 
 631         /* If we have queued a header size packet.. */
 632         if (size == sizeof(struct tcphdr)) 
 633         {
 634                 /* If its got a syn or fin its notionally included in the size..*/
 635                 if(!th->syn && !th->fin) 
 636                 {
 637                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 638                         kfree_skb(skb,FREE_WRITE);
 639                         return;
 640                 }
 641         }
 642 
 643         tcp_statistics.TcpOutSegs++;  
 644 
 645         skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
 646         if (after(skb->h.seq, sk->window_seq) ||
 647             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 648              sk->packets_out >= sk->cong_window) 
 649         {
 650                 /* checksum will be supplied by tcp_write_xmit.  So
 651                  * we shouldn't need to set it at all.  I'm being paraoid */
 652                 th->check = 0;
 653                 if (skb->next != NULL) 
 654                 {
 655                         printk("tcp_send_partial: next != NULL\n");
 656                         skb_unlink(skb);
 657                 }
 658                 skb_queue_tail(&sk->write_queue, skb);
 659                 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 660                     sk->send_head == NULL &&
 661                     sk->ack_backlog == 0)
 662                         reset_timer(sk, TIME_PROBE0, sk->rto);
 663         } 
 664         else 
 665         {
 666                 th->ack_seq = ntohl(sk->acked_seq);
 667                 th->window = ntohs(tcp_select_window(sk));
 668 
 669                 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
 670 
 671                 sk->sent_seq = sk->write_seq;
 672                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 673         }
 674 }
 675 
 676 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 677 {
 678         struct sk_buff * skb;
 679         unsigned long flags;
 680 
 681         save_flags(flags);
 682         cli();
 683         skb = sk->partial;
 684         if (skb) {
 685                 sk->partial = NULL;
 686                 del_timer(&sk->partial_timer);
 687         }
 688         restore_flags(flags);
 689         return skb;
 690 }
 691 
 692 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 693 {
 694         struct sk_buff *skb;
 695 
 696         if (sk == NULL)
 697                 return;
 698         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 699                 tcp_send_skb(sk, skb);
 700 }
 701 
 702 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 703 {
 704         struct sk_buff * tmp;
 705         unsigned long flags;
 706 
 707         save_flags(flags);
 708         cli();
 709         tmp = sk->partial;
 710         if (tmp)
 711                 del_timer(&sk->partial_timer);
 712         sk->partial = skb;
 713         init_timer(&sk->partial_timer);
 714         sk->partial_timer.expires = HZ;
 715         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 716         sk->partial_timer.data = (unsigned long) sk;
 717         add_timer(&sk->partial_timer);
 718         restore_flags(flags);
 719         if (tmp)
 720                 tcp_send_skb(sk, tmp);
 721 }
 722 
 723 
 724 /*
 725  *      This routine sends an ack and also updates the window. 
 726  */
 727  
 728 static void tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 729              struct sock *sk,
 730              struct tcphdr *th, unsigned long daddr)
 731 {
 732         struct sk_buff *buff;
 733         struct tcphdr *t1;
 734         struct device *dev = NULL;
 735         int tmp;
 736 
 737         if(sk->zapped)
 738                 return;         /* We have been reset, we may not send again */
 739         /*
 740          * We need to grab some memory, and put together an ack,
 741          * and then put it into the queue to be sent.
 742          */
 743 
 744         buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 745         if (buff == NULL) 
 746         {
 747                 /* Force it to send an ack. */
 748                 sk->ack_backlog++;
 749                 if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) 
 750                 {
 751                         reset_timer(sk, TIME_WRITE, 10);
 752                 }
 753                 return;
 754         }
 755 
 756         buff->len = sizeof(struct tcphdr);
 757         buff->sk = sk;
 758         buff->localroute = sk->localroute;
 759         t1 =(struct tcphdr *) buff->data;
 760 
 761         /* Put in the IP header and routing stuff. */
 762         tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 763                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 764         if (tmp < 0) 
 765         {
 766                 buff->free=1;
 767                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 768                 return;
 769         }
 770         buff->len += tmp;
 771         t1 =(struct tcphdr *)((char *)t1 +tmp);
 772 
 773         /* FIXME: */
 774         memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 775 
 776         /*
 777          *      Swap the send and the receive. 
 778          */
 779          
 780         t1->dest = th->source;
 781         t1->source = th->dest;
 782         t1->seq = ntohl(sequence);
 783         t1->ack = 1;
 784         sk->window = tcp_select_window(sk);
 785         t1->window = ntohs(sk->window);
 786         t1->res1 = 0;
 787         t1->res2 = 0;
 788         t1->rst = 0;
 789         t1->urg = 0;
 790         t1->syn = 0;
 791         t1->psh = 0;
 792         t1->fin = 0;
 793         if (ack == sk->acked_seq) 
 794         {
 795                 sk->ack_backlog = 0;
 796                 sk->bytes_rcv = 0;
 797                 sk->ack_timed = 0;
 798                 if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
 799                                   && sk->timeout == TIME_WRITE) 
 800                 {
 801                         if(sk->keepopen)
 802                                 reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 803                         else
 804                                 delete_timer(sk);
 805                 }
 806         }
 807         t1->ack_seq = ntohl(ack);
 808         t1->doff = sizeof(*t1)/4;
 809         tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 810         if (sk->debug)
 811                  printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 812         tcp_statistics.TcpOutSegs++;
 813         sk->prot->queue_xmit(sk, dev, buff, 1);
 814 }
 815 
 816 
 817 /* 
 818  *      This routine builds a generic TCP header. 
 819  */
 820  
 821 static int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 822 {
 823 
 824         /* FIXME: want to get rid of this. */
 825         memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 826         th->seq = htonl(sk->write_seq);
 827         th->psh =(push == 0) ? 1 : 0;
 828         th->doff = sizeof(*th)/4;
 829         th->ack = 1;
 830         th->fin = 0;
 831         sk->ack_backlog = 0;
 832         sk->bytes_rcv = 0;
 833         sk->ack_timed = 0;
 834         th->ack_seq = htonl(sk->acked_seq);
 835         sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/;
 836         th->window = htons(sk->window);
 837 
 838         return(sizeof(*th));
 839 }
 840 
 841 /*
 842  *      This routine copies from a user buffer into a socket,
 843  *      and starts the transmit system.
 844  */
 845 
 846 static int tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 847           int len, int nonblock, unsigned flags)
 848 {
 849         int copied = 0;
 850         int copy;
 851         int tmp;
 852         struct sk_buff *skb;
 853         struct sk_buff *send_tmp;
 854         unsigned char *buff;
 855         struct proto *prot;
 856         struct device *dev = NULL;
 857 
 858         sk->inuse=1;
 859         prot = sk->prot;
 860         while(len > 0) 
 861         {
 862                 if (sk->err) 
 863                 {                       /* Stop on an error */
 864                         release_sock(sk);
 865                         if (copied) 
 866                                 return(copied);
 867                         tmp = -sk->err;
 868                         sk->err = 0;
 869                         return(tmp);
 870                 }
 871 
 872         /*
 873          *      First thing we do is make sure that we are established. 
 874          */
 875         
 876                 if (sk->shutdown & SEND_SHUTDOWN) 
 877                 {
 878                         release_sock(sk);
 879                         sk->err = EPIPE;
 880                         if (copied) 
 881                                 return(copied);
 882                         sk->err = 0;
 883                         return(-EPIPE);
 884                 }
 885 
 886 
 887         /* 
 888          *      Wait for a connection to finish.
 889          */
 890         
 891                 while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
 892                 {
 893                         if (sk->err) 
 894                         {
 895                                 release_sock(sk);
 896                                 if (copied) 
 897                                         return(copied);
 898                                 tmp = -sk->err;
 899                                 sk->err = 0;
 900                                 return(tmp);
 901                         }
 902 
 903                         if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
 904                         {
 905                                 release_sock(sk);
 906                                 if (copied) 
 907                                         return(copied);
 908 
 909                                 if (sk->err) 
 910                                 {
 911                                         tmp = -sk->err;
 912                                         sk->err = 0;
 913                                         return(tmp);
 914                                 }
 915 
 916                                 if (sk->keepopen) 
 917                                 {
 918                                         send_sig(SIGPIPE, current, 0);
 919                                 }
 920                                 return(-EPIPE);
 921                         }
 922 
 923                         if (nonblock || copied) 
 924                         {
 925                                 release_sock(sk);
 926                                 if (copied) 
 927                                         return(copied);
 928                                 return(-EAGAIN);
 929                         }
 930 
 931                         release_sock(sk);
 932                         cli();
 933                 
 934                         if (sk->state != TCP_ESTABLISHED &&
 935                                 sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
 936                         {
 937                                 interruptible_sleep_on(sk->sleep);
 938                                 if (current->signal & ~current->blocked) 
 939                                 {
 940                                         sti();
 941                                         if (copied) 
 942                                                 return(copied);
 943                                         return(-ERESTARTSYS);
 944                                 }
 945                         }
 946                         sk->inuse = 1;
 947                         sti();
 948                 }
 949 
 950         /*
 951          * The following code can result in copy <= if sk->mss is ever
 952          * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
 953          * sk->mtu is constant once SYN processing is finished.  I.e. we
 954          * had better not get here until we've seen his SYN and at least one
 955          * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
 956          * But ESTABLISHED should guarantee that.  sk->max_window is by definition
 957          * non-decreasing.  Note that any ioctl to set user_mss must be done
 958          * before the exchange of SYN's.  If the initial ack from the other
 959          * end has a window of 0, max_window and thus mss will both be 0.
 960          */
 961 
 962         /* 
 963          *      Now we need to check if we have a half built packet. 
 964          */
 965 
 966                 if ((skb = tcp_dequeue_partial(sk)) != NULL) 
 967                 {
 968                         int hdrlen;
 969 
 970                          /* IP header + TCP header */
 971                         hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
 972                                  + sizeof(struct tcphdr);
 973         
 974                         /* Add more stuff to the end of skb->len */
 975                         if (!(flags & MSG_OOB)) 
 976                         {
 977                                 copy = min(sk->mss - (skb->len - hdrlen), len);
 978                                 /* FIXME: this is really a bug. */
 979                                 if (copy <= 0) 
 980                                 {
 981                                         printk("TCP: **bug**: \"copy\" <= 0!!\n");
 982                                         copy = 0;
 983                                 }
 984           
 985                                 memcpy_fromfs(skb->data + skb->len, from, copy);
 986                                 skb->len += copy;
 987                                 from += copy;
 988                                 copied += copy;
 989                                 len -= copy;
 990                                 sk->write_seq += copy;
 991                         }
 992                         if ((skb->len - hdrlen) >= sk->mss ||
 993                                 (flags & MSG_OOB) || !sk->packets_out)
 994                                 tcp_send_skb(sk, skb);
 995                         else
 996                                 tcp_enqueue_partial(skb, sk);
 997                         continue;
 998                 }
 999 
1000         /*
1001          * We also need to worry about the window.
1002          * If window < 1/2 the maximum window we've seen from this
1003          *   host, don't use it.  This is sender side
1004          *   silly window prevention, as specified in RFC1122.
1005          *   (Note that this is diffferent than earlier versions of
1006          *   SWS prevention, e.g. RFC813.).  What we actually do is 
1007          *   use the whole MSS.  Since the results in the right
1008          *   edge of the packet being outside the window, it will
1009          *   be queued for later rather than sent.
1010          */
1011 
1012                 copy = sk->window_seq - sk->write_seq;
1013                 if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
1014                         copy = sk->mss;
1015                 if (copy > len)
1016                         copy = len;
1017 
1018         /*
1019          *      We should really check the window here also. 
1020          */
1021          
1022                 send_tmp = NULL;
1023                 if (copy < sk->mss && !(flags & MSG_OOB)) 
1024                 {
1025                         /*
1026                          *      We will release the socket incase we sleep here. 
1027                          */
1028                         release_sock(sk);
1029                         /*
1030                          *      NB: following must be mtu, because mss can be increased.
1031                          *      mss is always <= mtu 
1032                          */
1033                         skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
1034                         sk->inuse = 1;
1035                         send_tmp = skb;
1036                 } 
1037                 else 
1038                 {
1039                         /*
1040                          *      We will release the socket incase we sleep here. 
1041                          */
1042                         release_sock(sk);
1043                         skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
1044                         sk->inuse = 1;
1045                 }
1046 
1047                 /*
1048                  *      If we didn't get any memory, we need to sleep. 
1049                  */
1050 
1051                 if (skb == NULL) 
1052                 {
1053                         if (nonblock /* || copied */) 
1054                         {
1055                                 release_sock(sk);
1056                                 if (copied) 
1057                                         return(copied);
1058                                 return(-EAGAIN);
1059                         }
1060 
1061                         /*
1062                          *      FIXME: here is another race condition. 
1063                          */
1064 
1065                         tmp = sk->wmem_alloc;
1066                         release_sock(sk);
1067                         cli();
1068                         /*
1069                          *      Again we will try to avoid it. 
1070                          */
1071                         if (tmp <= sk->wmem_alloc &&
1072                                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
1073                                 && sk->err == 0) 
1074                         {
1075                                 interruptible_sleep_on(sk->sleep);
1076                                 if (current->signal & ~current->blocked) 
1077                                 {
1078                                         sti();
1079                                         if (copied) 
1080                                                 return(copied);
1081                                         return(-ERESTARTSYS);
1082                                 }
1083                         }
1084                         sk->inuse = 1;
1085                         sti();
1086                         continue;
1087                 }
1088 
1089                 skb->len = 0;
1090                 skb->sk = sk;
1091                 skb->free = 0;
1092                 skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
1093         
1094                 buff = skb->data;
1095         
1096                 /*
1097                  * FIXME: we need to optimize this.
1098                  * Perhaps some hints here would be good.
1099                  */
1100                 
1101                 tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1102                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1103                 if (tmp < 0 ) 
1104                 {
1105                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1106                         release_sock(sk);
1107                         if (copied) 
1108                                 return(copied);
1109                         return(tmp);
1110                 }
1111                 skb->len += tmp;
1112                 skb->dev = dev;
1113                 buff += tmp;
1114                 skb->h.th =(struct tcphdr *) buff;
1115                 tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1116                 if (tmp < 0) 
1117                 {
1118                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1119                         release_sock(sk);
1120                         if (copied) 
1121                                 return(copied);
1122                         return(tmp);
1123                 }
1124 
1125                 if (flags & MSG_OOB) 
1126                 {
1127                         ((struct tcphdr *)buff)->urg = 1;
1128                         ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1129                 }
1130                 skb->len += tmp;
1131                 memcpy_fromfs(buff+tmp, from, copy);
1132 
1133                 from += copy;
1134                 copied += copy;
1135                 len -= copy;
1136                 skb->len += copy;
1137                 skb->free = 0;
1138                 sk->write_seq += copy;
1139         
1140                 if (send_tmp != NULL && sk->packets_out) 
1141                 {
1142                         tcp_enqueue_partial(send_tmp, sk);
1143                         continue;
1144                 }
1145                 tcp_send_skb(sk, skb);
1146         }
1147         sk->err = 0;
1148 
1149 /*
1150  *      Nagles rule. Turn Nagle off with TCP_NODELAY for highly
1151  *      interactive fast network servers. It's meant to be on and
1152  *      it really improves the throughput though not the echo time
1153  *      on my slow slip link - Alan
1154  */
1155 
1156 /*
1157  *      Avoid possible race on send_tmp - c/o Johannes Stille 
1158  */
1159  
1160         if(sk->partial && ((!sk->packets_out) 
1161      /* If not nagling we can send on the before case too.. */
1162               || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1163         ))
1164                 tcp_send_partial(sk);
1165 
1166         release_sock(sk);
1167         return(copied);
1168 }
1169 
1170 
1171 static int tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1172            int len, int nonblock, unsigned flags,
1173            struct sockaddr_in *addr, int addr_len)
1174 {
1175         if (flags & ~(MSG_OOB|MSG_DONTROUTE))
1176                 return -EINVAL;
1177         if (addr_len < sizeof(*addr)) 
1178                 return(-EINVAL);
1179         if (addr->sin_family && addr->sin_family != AF_INET) 
1180                 return(-EINVAL);
1181         if (addr->sin_port != sk->dummy_th.dest) 
1182                 return(-EISCONN);
1183         if (addr->sin_addr.s_addr != sk->daddr) 
1184                 return(-EISCONN);
1185         return(tcp_write(sk, from, len, nonblock, flags));
1186 }
1187 
1188 
1189 static void tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1190 {
1191         int tmp;
1192         struct device *dev = NULL;
1193         struct tcphdr *t1;
1194         struct sk_buff *buff;
1195 
1196         if (!sk->ack_backlog) 
1197                 return;
1198 
1199         /*
1200          * FIXME: we need to put code here to prevent this routine from
1201          * being called.  Being called once in a while is ok, so only check
1202          * if this is the second time in a row.
1203          */
1204 
1205         /*
1206          * We need to grab some memory, and put together an ack,
1207          * and then put it into the queue to be sent.
1208          */
1209 
1210         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1211         if (buff == NULL) 
1212         {
1213                 /* Try again real soon. */
1214                 reset_timer(sk, TIME_WRITE, 10);
1215                 return;
1216         }
1217 
1218         buff->len = sizeof(struct tcphdr);
1219         buff->sk = sk;
1220         buff->localroute = sk->localroute;
1221         
1222         /*
1223          *      Put in the IP header and routing stuff. 
1224          */
1225 
1226         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1227                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1228         if (tmp < 0) 
1229         {
1230                 buff->free=1;
1231                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1232                 return;
1233         }
1234 
1235         buff->len += tmp;
1236         t1 =(struct tcphdr *)(buff->data +tmp);
1237 
1238         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1239         t1->seq = htonl(sk->sent_seq);
1240         t1->ack = 1;
1241         t1->res1 = 0;
1242         t1->res2 = 0;
1243         t1->rst = 0;
1244         t1->urg = 0;
1245         t1->syn = 0;
1246         t1->psh = 0;
1247         sk->ack_backlog = 0;
1248         sk->bytes_rcv = 0;
1249         sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
1250         t1->window = ntohs(sk->window);
1251         t1->ack_seq = ntohl(sk->acked_seq);
1252         t1->doff = sizeof(*t1)/4;
1253         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1254         sk->prot->queue_xmit(sk, dev, buff, 1);
1255         tcp_statistics.TcpOutSegs++;
1256 }
1257 
1258 
1259 /*
1260  *      FIXME:
1261  *      This routine frees used buffers.
1262  *      It should consider sending an ACK to let the
1263  *      other end know we now have a bigger window.
1264  */
1265 
1266 static void cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1267 {
1268         unsigned long flags;
1269         unsigned long left;
1270         struct sk_buff *skb;
1271         unsigned long rspace;
1272 
1273         if(sk->debug)
1274                 printk("cleaning rbuf for sk=%p\n", sk);
1275   
1276         save_flags(flags);
1277         cli();
1278   
1279         left = sk->prot->rspace(sk);
1280  
1281         /*
1282          * We have to loop through all the buffer headers,
1283          * and try to free up all the space we can.
1284          */
1285 
1286         while((skb=skb_peek(&sk->receive_queue)) != NULL) 
1287         {
1288                 if (!skb->used) 
1289                         break;
1290                 skb_unlink(skb);
1291                 skb->sk = sk;
1292                 kfree_skb(skb, FREE_READ);
1293         }
1294 
1295         restore_flags(flags);
1296 
1297         /*
1298          * FIXME:
1299          * At this point we should send an ack if the difference
1300          * in the window, and the amount of space is bigger than
1301          * TCP_WINDOW_DIFF.
1302          */
1303 
1304         if(sk->debug)
1305                 printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk),
1306                                             left);
1307         if ((rspace=sk->prot->rspace(sk)) != left) 
1308         {
1309                 /*
1310                  * This area has caused the most trouble.  The current strategy
1311                  * is to simply do nothing if the other end has room to send at
1312                  * least 3 full packets, because the ack from those will auto-
1313                  * matically update the window.  If the other end doesn't think
1314                  * we have much space left, but we have room for atleast 1 more
1315                  * complete packet than it thinks we do, we will send an ack
1316                  * immediatedly.  Otherwise we will wait up to .5 seconds in case
1317                  * the user reads some more.
1318                  */
1319                 sk->ack_backlog++;
1320         /*
1321          * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1322          * if the other end is offering a window smaller than the agreed on MSS
1323          * (called sk->mtu here).  In theory there's no connection between send
1324          * and receive, and so no reason to think that they're going to send
1325          * small packets.  For the moment I'm using the hack of reducing the mss
1326          * only on the send side, so I'm putting mtu here.
1327          */
1328 
1329                 if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) 
1330                 {
1331                         /* Send an ack right now. */
1332                         tcp_read_wakeup(sk);
1333                 } 
1334                 else 
1335                 {
1336                         /* Force it to send an ack soon. */
1337                         int was_active = del_timer(&sk->timer);
1338                         if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
1339                         {
1340                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1341                         } 
1342                         else
1343                                 add_timer(&sk->timer);
1344                 }
1345         }
1346 } 
1347 
1348 
1349 /*
1350  *      Handle reading urgent data. 
1351  */
1352  
1353 static int tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1354              unsigned char *to, int len, unsigned flags)
1355 {
1356         struct wait_queue wait = { current, NULL };
1357 
1358         while (len > 0) 
1359         {
1360                 if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1361                         return -EINVAL;
1362                 if (sk->urg_data & URG_VALID) 
1363                 {
1364                         char c = sk->urg_data;
1365                         if (!(flags & MSG_PEEK))
1366                                 sk->urg_data = URG_READ;
1367                         put_fs_byte(c, to);
1368                         return 1;
1369                 }
1370 
1371                 if (sk->err) 
1372                 {
1373                         int tmp = -sk->err;
1374                         sk->err = 0;
1375                         return tmp;
1376                 }
1377 
1378                 if (sk->state == TCP_CLOSE || sk->done) 
1379                 {
1380                         if (!sk->done) {
1381                                 sk->done = 1;
1382                                 return 0;
1383                         }
1384                         return -ENOTCONN;
1385                 }
1386 
1387                 if (sk->shutdown & RCV_SHUTDOWN) 
1388                 {
1389                         sk->done = 1;
1390                         return 0;
1391                 }
1392 
1393                 if (nonblock)
1394                         return -EAGAIN;
1395 
1396                 if (current->signal & ~current->blocked)
1397                         return -ERESTARTSYS;
1398 
1399                 current->state = TASK_INTERRUPTIBLE;
1400                 add_wait_queue(sk->sleep, &wait);
1401                 if ((sk->urg_data & URG_NOTYET) && sk->err == 0 &&
1402                     !(sk->shutdown & RCV_SHUTDOWN))
1403                         schedule();
1404                 remove_wait_queue(sk->sleep, &wait);
1405                 current->state = TASK_RUNNING;
1406         }
1407         return 0;
1408 }
1409 
1410 
1411 /*
1412  *      This routine copies from a sock struct into the user buffer. 
1413  */
1414  
1415 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1416         int len, int nonblock, unsigned flags)
1417 {
1418         struct wait_queue wait = { current, NULL };
1419         int copied = 0;
1420         unsigned long peek_seq;
1421         unsigned long *seq;
1422         unsigned long used;
1423 
1424         /* This error should be checked. */
1425         if (sk->state == TCP_LISTEN)
1426                 return -ENOTCONN;
1427 
1428         /* Urgent data needs to be handled specially. */
1429         if (flags & MSG_OOB)
1430                 return tcp_read_urg(sk, nonblock, to, len, flags);
1431 
1432         peek_seq = sk->copied_seq;
1433         seq = &sk->copied_seq;
1434         if (flags & MSG_PEEK)
1435                 seq = &peek_seq;
1436 
1437         add_wait_queue(sk->sleep, &wait);
1438         sk->inuse = 1;
1439         while (len > 0) 
1440         {
1441                 struct sk_buff * skb;
1442                 unsigned long offset;
1443         
1444                 /*
1445                  * are we at urgent data? Stop if we have read anything.
1446                  */
1447                 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1448                         break;
1449 
1450                 current->state = TASK_INTERRUPTIBLE;
1451 
1452                 skb = skb_peek(&sk->receive_queue);
1453                 do 
1454                 {
1455                         if (!skb)
1456                                 break;
1457                         if (before(1+*seq, skb->h.th->seq))
1458                                 break;
1459                         offset = 1 + *seq - skb->h.th->seq;
1460                         if (skb->h.th->syn)
1461                                 offset--;
1462                         if (offset < skb->len)
1463                                 goto found_ok_skb;
1464                         if (!(flags & MSG_PEEK))
1465                                 skb->used = 1;
1466                         skb = skb->next;
1467                 }
1468                 while (skb != (struct sk_buff *)&sk->receive_queue);
1469 
1470                 if (copied)
1471                         break;
1472 
1473                 if (sk->err) 
1474                 {
1475                         copied = -sk->err;
1476                         sk->err = 0;
1477                         break;
1478                 }
1479 
1480                 if (sk->state == TCP_CLOSE) 
1481                 {
1482                         if (!sk->done) 
1483                         {
1484                                 sk->done = 1;
1485                                 break;
1486                         }
1487                         copied = -ENOTCONN;
1488                         break;
1489                 }
1490 
1491                 if (sk->shutdown & RCV_SHUTDOWN) 
1492                 {
1493                         sk->done = 1;
1494                         break;
1495                 }
1496                         
1497                 if (nonblock) 
1498                 {
1499                         copied = -EAGAIN;
1500                         break;
1501                 }
1502 
1503                 cleanup_rbuf(sk);
1504                 release_sock(sk);
1505                 schedule();
1506                 sk->inuse = 1;
1507 
1508                 if (current->signal & ~current->blocked) 
1509                 {
1510                         copied = -ERESTARTSYS;
1511                         break;
1512                 }
1513                 continue;
1514 
1515         found_ok_skb:
1516                 /* Ok so how much can we use ? */
1517                 used = skb->len - offset;
1518                 if (len < used)
1519                         used = len;
1520                 /* do we have urgent data here? */
1521                 if (sk->urg_data) 
1522                 {
1523                         unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1524                         if (urg_offset < used) 
1525                         {
1526                                 if (!urg_offset) 
1527                                 {
1528                                         if (!sk->urginline) 
1529                                         {
1530                                                 ++*seq;
1531                                                 offset++;
1532                                                 used--;
1533                                         }
1534                                 }
1535                                 else
1536                                         used = urg_offset;
1537                         }
1538                 }
1539                 /* Copy it */
1540                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1541                         skb->h.th->doff*4 + offset, used);
1542                 copied += used;
1543                 len -= used;
1544                 to += used;
1545                 *seq += used;
1546                 if (after(sk->copied_seq+1,sk->urg_seq))
1547                         sk->urg_data = 0;
1548                 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1549                         skb->used = 1;
1550         }
1551         remove_wait_queue(sk->sleep, &wait);
1552         current->state = TASK_RUNNING;
1553 
1554         /* Clean up data we have read: This will do ACK frames */
1555         cleanup_rbuf(sk);
1556         release_sock(sk);
1557         return copied;
1558 }
1559 
1560  
1561 /*
1562  *      Shutdown the sending side of a connection.
1563  */
1564 
1565 void tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1566 {
1567         struct sk_buff *buff;
1568         struct tcphdr *t1, *th;
1569         struct proto *prot;
1570         int tmp;
1571         struct device *dev = NULL;
1572 
1573         /*
1574          * We need to grab some memory, and put together a FIN,
1575          * and then put it into the queue to be sent.
1576          * FIXME:
1577          *
1578          *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1579          *      Most of this is guesswork, so maybe it will work...
1580          */
1581 
1582         if (!(how & SEND_SHUTDOWN)) 
1583                 return;
1584          
1585         /*
1586          *      If we've already sent a FIN, return. 
1587          */
1588          
1589         if (sk->state == TCP_FIN_WAIT1 ||
1590             sk->state == TCP_FIN_WAIT2 ||
1591             sk->state == TCP_CLOSING ||
1592             sk->state == TCP_LAST_ACK ||
1593             sk->state == TCP_TIME_WAIT
1594         ) 
1595         {
1596                 return;
1597         }
1598         sk->inuse = 1;
1599 
1600         /*
1601          * flag that the sender has shutdown
1602          */
1603 
1604         sk->shutdown |= SEND_SHUTDOWN;
1605 
1606         /*
1607          *  Clear out any half completed packets. 
1608          */
1609 
1610         if (sk->partial)
1611                 tcp_send_partial(sk);
1612 
1613         prot =(struct proto *)sk->prot;
1614         th =(struct tcphdr *)&sk->dummy_th;
1615         release_sock(sk); /* incase the malloc sleeps. */
1616         buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1617         if (buff == NULL)
1618                 return;
1619         sk->inuse = 1;
1620 
1621         buff->sk = sk;
1622         buff->len = sizeof(*t1);
1623         buff->localroute = sk->localroute;
1624         t1 =(struct tcphdr *) buff->data;
1625 
1626         /*
1627          *      Put in the IP header and routing stuff. 
1628          */
1629 
1630         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1631                            IPPROTO_TCP, sk->opt,
1632                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1633         if (tmp < 0) 
1634         {
1635                 /*
1636                  *      Finish anyway, treat this as a send that got lost. 
1637                  *
1638                  *      Enter FIN_WAIT1 on normal shutdown, which waits for
1639                  *      written data to be completely acknowledged along
1640                  *      with an acknowledge to our FIN.
1641                  *
1642                  *      Enter FIN_WAIT2 on abnormal shutdown -- close before
1643                  *      connection established.
1644                  */
1645                 buff->free=1;
1646                 prot->wfree(sk,buff->mem_addr, buff->mem_len);
1647 
1648                 if (sk->state == TCP_ESTABLISHED)
1649                         sk->state = TCP_FIN_WAIT1;
1650                 else if(sk->state == TCP_CLOSE_WAIT)
1651                         sk->state = TCP_LAST_ACK;
1652                 else
1653                         sk->state = TCP_FIN_WAIT2;
1654 
1655                 release_sock(sk);
1656                 return;
1657         }
1658 
1659         t1 =(struct tcphdr *)((char *)t1 +tmp);
1660         buff->len += tmp;
1661         buff->dev = dev;
1662         memcpy(t1, th, sizeof(*t1));
1663         t1->seq = ntohl(sk->write_seq);
1664         sk->write_seq++;
1665         buff->h.seq = sk->write_seq;
1666         t1->ack = 1;
1667         t1->ack_seq = ntohl(sk->acked_seq);
1668         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
1669         t1->fin = 1;
1670         t1->rst = 0;
1671         t1->doff = sizeof(*t1)/4;
1672         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1673 
1674         /*
1675          * Can't just queue this up.
1676          * It should go at the end of the write queue.
1677          */
1678         
1679         if (skb_peek(&sk->write_queue) != NULL) 
1680         {
1681                 buff->free=0;
1682                 if (buff->next != NULL) 
1683                 {
1684                         printk("tcp_shutdown: next != NULL\n");
1685                         skb_unlink(buff);
1686                 }
1687                 skb_queue_tail(&sk->write_queue, buff);
1688         } 
1689         else 
1690         {
1691                 sk->sent_seq = sk->write_seq;
1692                 sk->prot->queue_xmit(sk, dev, buff, 0);
1693         }
1694 
1695         if (sk->state == TCP_ESTABLISHED) 
1696                 sk->state = TCP_FIN_WAIT1;
1697         else if (sk->state == TCP_CLOSE_WAIT)
1698                 sk->state = TCP_LAST_ACK;
1699         else
1700                 sk->state = TCP_FIN_WAIT2;
1701 
1702         release_sock(sk);
1703 }
1704 
1705 
1706 static int
1707 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1708              int to_len, int nonblock, unsigned flags,
1709              struct sockaddr_in *addr, int *addr_len)
1710 {
1711         int result;
1712   
1713         /* 
1714          *      Have to check these first unlike the old code. If 
1715          *      we check them after we lose data on an error
1716          *      which is wrong 
1717          */
1718 
1719         if(addr_len)
1720                 *addr_len = sizeof(*addr);
1721         result=tcp_read(sk, to, to_len, nonblock, flags);
1722 
1723         if (result < 0) 
1724                 return(result);
1725   
1726         if(addr)
1727         {
1728                 addr->sin_family = AF_INET;
1729                 addr->sin_port = sk->dummy_th.dest;
1730                 addr->sin_addr.s_addr = sk->daddr;
1731         }
1732         return(result);
1733 }
1734 
1735 
1736 /*
1737  *      This routine will send an RST to the other tcp. 
1738  */
1739  
1740 static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1741           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1742 {
1743         struct sk_buff *buff;
1744         struct tcphdr *t1;
1745         int tmp;
1746         struct device *ndev=NULL;
1747   
1748 /*
1749  * We need to grab some memory, and put together an RST,
1750  * and then put it into the queue to be sent.
1751  */
1752 
1753         buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1754         if (buff == NULL) 
1755                 return;
1756 
1757         buff->len = sizeof(*t1);
1758         buff->sk = NULL;
1759         buff->dev = dev;
1760         buff->localroute = 0;
1761 
1762         t1 =(struct tcphdr *) buff->data;
1763 
1764         /*
1765          *      Put in the IP header and routing stuff. 
1766          */
1767 
1768         tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
1769                            sizeof(struct tcphdr),tos,ttl);
1770         if (tmp < 0) 
1771         {
1772                 buff->free = 1;
1773                 prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1774                 return;
1775         }
1776 
1777         t1 =(struct tcphdr *)((char *)t1 +tmp);
1778         buff->len += tmp;
1779         memcpy(t1, th, sizeof(*t1));
1780 
1781         /*
1782          *      Swap the send and the receive. 
1783          */
1784 
1785         t1->dest = th->source;
1786         t1->source = th->dest;
1787         t1->rst = 1;  
1788         t1->window = 0;
1789   
1790         if(th->ack)
1791         {
1792                 t1->ack = 0;
1793                 t1->seq = th->ack_seq;
1794                 t1->ack_seq = 0;
1795         }
1796         else
1797         {
1798                 t1->ack = 1;
1799                 if(!th->syn)
1800                         t1->ack_seq=htonl(th->seq);
1801                 else
1802                         t1->ack_seq=htonl(th->seq+1);
1803                 t1->seq=0;
1804         }
1805 
1806         t1->syn = 0;
1807         t1->urg = 0;
1808         t1->fin = 0;
1809         t1->psh = 0;
1810         t1->doff = sizeof(*t1)/4;
1811         tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1812         prot->queue_xmit(NULL, dev, buff, 1);
1813         tcp_statistics.TcpOutSegs++;
1814 }
1815 
1816 
1817 /*
1818  *      Look for tcp options. Parses everything but only knows about MSS.
1819  *      This routine is always called with the packet containing the SYN.
1820  *      However it may also be called with the ack to the SYN.  So you
1821  *      can't assume this is always the SYN.  It's always called after
1822  *      we have set up sk->mtu to our own MTU.
1823  */
1824  
1825 static void tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1826 {
1827         unsigned char *ptr;
1828         int length=(th->doff*4)-sizeof(struct tcphdr);
1829         int mss_seen = 0;
1830     
1831         ptr = (unsigned char *)(th + 1);
1832   
1833         while(length>0)
1834         {
1835                 int opcode=*ptr++;
1836                 int opsize=*ptr++;
1837                 switch(opcode)
1838                 {
1839                         case TCPOPT_EOL:
1840                                 return;
1841                         case TCPOPT_NOP:
1842                                 length-=2;
1843                                 continue;
1844                         
1845                         default:
1846                                 if(opsize<=2)   /* Avoid silly options looping forever */
1847                                         return;
1848                                 switch(opcode)
1849                                 {
1850                                         case TCPOPT_MSS:
1851                                                 if(opsize==4 && th->syn)
1852                                                 {
1853                                                         sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1854                                                         mss_seen = 1;
1855                                                 }
1856                                                 break;
1857                                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1858                                 }
1859                                 ptr+=opsize-2;
1860                                 length-=opsize;
1861                 }
1862         }
1863         if (th->syn) 
1864         {
1865                 if (! mss_seen)
1866                       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1867         }
1868 #ifdef CONFIG_INET_PCTCP
1869         sk->mss = min(sk->max_window >> 1, sk->mtu);
1870 #else    
1871         sk->mss = min(sk->max_window, sk->mtu);
1872 #endif  
1873 }
1874 
1875 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1876 {
1877         dst = ntohl(dst);
1878         if (IN_CLASSA(dst))
1879                 return htonl(IN_CLASSA_NET);
1880         if (IN_CLASSB(dst))
1881                 return htonl(IN_CLASSB_NET);
1882         return htonl(IN_CLASSC_NET);
1883 }
1884 
1885 /*
1886  *      This routine handles a connection request.
1887  *      It should make sure we haven't already responded.
1888  *      Because of the way BSD works, we have to send a syn/ack now.
1889  *      This also means it will be harder to close a socket which is
1890  *      listening.
1891  */
1892  
1893 static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1894                  unsigned long daddr, unsigned long saddr,
1895                  struct options *opt, struct device *dev)
1896 {
1897         struct sk_buff *buff;
1898         struct tcphdr *t1;
1899         unsigned char *ptr;
1900         struct sock *newsk;
1901         struct tcphdr *th;
1902         struct device *ndev=NULL;
1903         int tmp;
1904         struct rtable *rt;
1905   
1906         th = skb->h.th;
1907 
1908         /* If the socket is dead, don't accept the connection. */
1909         if (!sk->dead) 
1910         {
1911                 sk->data_ready(sk,0);
1912         }
1913         else 
1914         {
1915                 tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1916                 tcp_statistics.TcpAttemptFails++;
1917                 kfree_skb(skb, FREE_READ);
1918                 return;
1919         }
1920 
1921         /*
1922          * Make sure we can accept more.  This will prevent a
1923          * flurry of syns from eating up all our memory.
1924          */
1925 
1926         if (sk->ack_backlog >= sk->max_ack_backlog) 
1927         {
1928                 tcp_statistics.TcpAttemptFails++;
1929                 kfree_skb(skb, FREE_READ);
1930                 return;
1931         }
1932 
1933         /*
1934          * We need to build a new sock struct.
1935          * It is sort of bad to have a socket without an inode attached
1936          * to it, but the wake_up's will just wake up the listening socket,
1937          * and if the listening socket is destroyed before this is taken
1938          * off of the queue, this will take care of it.
1939          */
1940 
1941         newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
1942         if (newsk == NULL) 
1943         {
1944                 /* just ignore the syn.  It will get retransmitted. */
1945                 tcp_statistics.TcpAttemptFails++;
1946                 kfree_skb(skb, FREE_READ);
1947                 return;
1948         }
1949 
1950         memcpy(newsk, sk, sizeof(*newsk));
1951         skb_queue_head_init(&newsk->write_queue);
1952         skb_queue_head_init(&newsk->receive_queue);
1953         newsk->send_head = NULL;
1954         newsk->send_tail = NULL;
1955         skb_queue_head_init(&newsk->back_log);
1956         newsk->rtt = 0;         /*TCP_CONNECT_TIME<<3*/
1957         newsk->rto = TCP_TIMEOUT_INIT;
1958         newsk->mdev = 0;
1959         newsk->max_window = 0;
1960         newsk->cong_window = 1;
1961         newsk->cong_count = 0;
1962         newsk->ssthresh = 0;
1963         newsk->backoff = 0;
1964         newsk->blog = 0;
1965         newsk->intr = 0;
1966         newsk->proc = 0;
1967         newsk->done = 0;
1968         newsk->partial = NULL;
1969         newsk->pair = NULL;
1970         newsk->wmem_alloc = 0;
1971         newsk->rmem_alloc = 0;
1972         newsk->localroute = sk->localroute;
1973 
1974         newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
1975 
1976         newsk->err = 0;
1977         newsk->shutdown = 0;
1978         newsk->ack_backlog = 0;
1979         newsk->acked_seq = skb->h.th->seq+1;
1980         newsk->fin_seq = skb->h.th->seq;
1981         newsk->copied_seq = skb->h.th->seq;
1982         newsk->state = TCP_SYN_RECV;
1983         newsk->timeout = 0;
1984         newsk->write_seq = jiffies * SEQ_TICK - seq_offset;
1985         newsk->window_seq = newsk->write_seq;
1986         newsk->rcv_ack_seq = newsk->write_seq;
1987         newsk->urg_data = 0;
1988         newsk->retransmits = 0;
1989         newsk->destroy = 0;
1990         init_timer(&newsk->timer);
1991         newsk->timer.data = (unsigned long)newsk;
1992         newsk->timer.function = &net_timer;
1993         newsk->dummy_th.source = skb->h.th->dest;
1994         newsk->dummy_th.dest = skb->h.th->source;
1995         
1996         /*
1997          *      Swap these two, they are from our point of view. 
1998          */
1999          
2000         newsk->daddr = saddr;
2001         newsk->saddr = daddr;
2002 
2003         put_sock(newsk->num,newsk);
2004         newsk->dummy_th.res1 = 0;
2005         newsk->dummy_th.doff = 6;
2006         newsk->dummy_th.fin = 0;
2007         newsk->dummy_th.syn = 0;
2008         newsk->dummy_th.rst = 0;        
2009         newsk->dummy_th.psh = 0;
2010         newsk->dummy_th.ack = 0;
2011         newsk->dummy_th.urg = 0;
2012         newsk->dummy_th.res2 = 0;
2013         newsk->acked_seq = skb->h.th->seq + 1;
2014         newsk->copied_seq = skb->h.th->seq;
2015 
2016         /*
2017          *      Grab the ttl and tos values and use them 
2018          */
2019 
2020         newsk->ip_ttl=sk->ip_ttl;
2021         newsk->ip_tos=skb->ip_hdr->tos;
2022 
2023         /*
2024          *      Use 512 or whatever user asked for 
2025          */
2026 
2027         /*
2028          *      Note use of sk->user_mss, since user has no direct access to newsk 
2029          */
2030 
2031         rt=ip_rt_route(saddr, NULL,NULL);
2032         
2033         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
2034                 newsk->window_clamp = rt->rt_window;
2035         else
2036                 newsk->window_clamp = 0;
2037                 
2038         if (sk->user_mss)
2039                 newsk->mtu = sk->user_mss;
2040         else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
2041                 newsk->mtu = rt->rt_mss - HEADER_SIZE;
2042         else 
2043         {
2044 #ifdef CONFIG_INET_SNARL        /* Sub Nets ARe Local */
2045                 if ((saddr ^ daddr) & default_mask(saddr))
2046 #else
2047                 if ((saddr ^ daddr) & dev->pa_mask)
2048 #endif
2049                         newsk->mtu = 576 - HEADER_SIZE;
2050                 else
2051                         newsk->mtu = MAX_WINDOW;
2052         }
2053 
2054         /*
2055          *      But not bigger than device MTU 
2056          */
2057 
2058         newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
2059 
2060         /*
2061          *      This will min with what arrived in the packet 
2062          */
2063 
2064         tcp_options(newsk,skb->h.th);
2065 
2066         buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
2067         if (buff == NULL) 
2068         {
2069                 sk->err = -ENOMEM;
2070                 newsk->dead = 1;
2071                 release_sock(newsk);
2072                 kfree_skb(skb, FREE_READ);
2073                 tcp_statistics.TcpAttemptFails++;
2074                 return;
2075         }
2076   
2077         buff->len = sizeof(struct tcphdr)+4;
2078         buff->sk = newsk;
2079         buff->localroute = newsk->localroute;
2080 
2081         t1 =(struct tcphdr *) buff->data;
2082 
2083         /*
2084          *      Put in the IP header and routing stuff. 
2085          */
2086 
2087         tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
2088                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2089 
2090         /*
2091          *      Something went wrong. 
2092          */
2093 
2094         if (tmp < 0) 
2095         {
2096                 sk->err = tmp;
2097                 buff->free=1;
2098                 kfree_skb(buff,FREE_WRITE);
2099                 newsk->dead = 1;
2100                 release_sock(newsk);
2101                 skb->sk = sk;
2102                 kfree_skb(skb, FREE_READ);
2103                 tcp_statistics.TcpAttemptFails++;
2104                 return;
2105         }
2106 
2107         buff->len += tmp;
2108         t1 =(struct tcphdr *)((char *)t1 +tmp);
2109   
2110         memcpy(t1, skb->h.th, sizeof(*t1));
2111         buff->h.seq = newsk->write_seq;
2112         /*
2113          *      Swap the send and the receive. 
2114          */
2115         t1->dest = skb->h.th->source;
2116         t1->source = newsk->dummy_th.source;
2117         t1->seq = ntohl(newsk->write_seq++);
2118         t1->ack = 1;
2119         newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/
2120         newsk->sent_seq = newsk->write_seq;
2121         t1->window = ntohs(newsk->window);
2122         t1->res1 = 0;
2123         t1->res2 = 0;
2124         t1->rst = 0;
2125         t1->urg = 0;
2126         t1->psh = 0;
2127         t1->syn = 1;
2128         t1->ack_seq = ntohl(skb->h.th->seq+1);
2129         t1->doff = sizeof(*t1)/4+1;
2130         ptr =(unsigned char *)(t1+1);
2131         ptr[0] = 2;
2132         ptr[1] = 4;
2133         ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2134         ptr[3] =(newsk->mtu) & 0xff;
2135 
2136         tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2137         newsk->prot->queue_xmit(newsk, dev, buff, 0);
2138 
2139         reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_TIMEOUT_INIT);
2140         skb->sk = newsk;
2141 
2142         /*
2143          *      Charge the sock_buff to newsk. 
2144          */
2145          
2146         sk->rmem_alloc -= skb->mem_len;
2147         newsk->rmem_alloc += skb->mem_len;
2148         
2149         skb_queue_tail(&sk->receive_queue,skb);
2150         sk->ack_backlog++;
2151         release_sock(newsk);
2152         tcp_statistics.TcpOutSegs++;
2153 }
2154 
2155 
2156 static void tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2157 {
2158         struct sk_buff *buff;
2159         int need_reset = 0;
2160         struct tcphdr *t1, *th;
2161         struct proto *prot;
2162         struct device *dev=NULL;
2163         int tmp;
2164 
2165         /*
2166          * We need to grab some memory, and put together a FIN, 
2167          * and then put it into the queue to be sent.
2168          */
2169         sk->inuse = 1;
2170         sk->keepopen = 1;
2171         sk->shutdown = SHUTDOWN_MASK;
2172 
2173         if (!sk->dead) 
2174                 sk->state_change(sk);
2175 
2176         /*
2177          *      We need to flush the recv. buffs. 
2178          */
2179 
2180         if (skb_peek(&sk->receive_queue) != NULL) 
2181         {
2182                 struct sk_buff *skb;
2183                 if(sk->debug)
2184                         printk("Clean rcv queue\n");
2185                 while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
2186                 {
2187                         /* The +1 is not needed because the FIN takes up sequence space and
2188                            is not read!!! */
2189                         if(skb->len > 0 && after(skb->h.th->seq + skb->len/* + 1 */ , sk->copied_seq))
2190                                 need_reset = 1;
2191                         kfree_skb(skb, FREE_READ);
2192                 }
2193                 if(sk->debug)
2194                         printk("Cleaned.\n");
2195         }
2196 
2197         /*
2198          *      Get rid off any half-completed packets. 
2199          */
2200          
2201         if (sk->partial) 
2202         {
2203                 tcp_send_partial(sk);
2204         }
2205 
2206         switch(sk->state) 
2207         {
2208                 case TCP_FIN_WAIT1:
2209                 case TCP_FIN_WAIT2:
2210                 case TCP_CLOSING:
2211                         /*
2212                          * These states occur when we have already closed out
2213                          * our end.  If there is no timeout, we do not do
2214                          * anything.  We may still be in the middle of sending
2215                          * the remainder of our buffer, for example...
2216                          * resetting the timer would be inappropriate.
2217                          *
2218                          * XXX if retransmit count reaches limit, is tcp_close()
2219                          * called with timeout == 1 ? if not, we need to fix that.
2220                          */
2221                         if (!timeout) {
2222                                 int timer_active;
2223 
2224                                 timer_active = del_timer(&sk->timer);
2225                                 if (timer_active)
2226                                         add_timer(&sk->timer);
2227                                 else
2228                                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2229                         }
2230 #ifdef NOTDEF
2231                         /* 
2232                          *      Start a timer.
2233                          * original code was 4 * sk->rtt.  In converting to the
2234                          * new rtt representation, we can't quite use that.
2235                          * it seems to make most sense to  use the backed off value
2236                          */
2237                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2238 #endif
2239                         if (timeout) 
2240                                 tcp_time_wait(sk);
2241                         release_sock(sk);
2242                         return; /* break causes a double release - messy */
2243                 case TCP_TIME_WAIT:
2244                 case TCP_LAST_ACK:
2245                         /*
2246                          * A timeout from these states terminates the TCB.
2247                          */
2248                         if (timeout) 
2249                         {
2250                                 sk->state = TCP_CLOSE;
2251                         }
2252                         release_sock(sk);
2253                         return;
2254                 case TCP_LISTEN:
2255                         sk->state = TCP_CLOSE;
2256                         release_sock(sk);
2257                         return;
2258                 case TCP_CLOSE:
2259                         release_sock(sk);
2260                         return;
2261                 case TCP_CLOSE_WAIT:
2262                 case TCP_ESTABLISHED:
2263                 case TCP_SYN_SENT:
2264                 case TCP_SYN_RECV:
2265                         prot =(struct proto *)sk->prot;
2266                         th =(struct tcphdr *)&sk->dummy_th;
2267                         buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2268                         if (buff == NULL) 
2269                         {
2270                                 /* This will force it to try again later. */
2271                                 /* Or it would have if someone released the socket
2272                                    first. Anyway it might work now */
2273                                 release_sock(sk);
2274                                 if (sk->state != TCP_CLOSE_WAIT)
2275                                         sk->state = TCP_ESTABLISHED;
2276                                 reset_timer(sk, TIME_CLOSE, 100);
2277                                 return;
2278                         }
2279                         buff->sk = sk;
2280                         buff->free = 1;
2281                         buff->len = sizeof(*t1);
2282                         buff->localroute = sk->localroute;
2283                         t1 =(struct tcphdr *) buff->data;
2284         
2285                         /*
2286                          *      Put in the IP header and routing stuff. 
2287                          */
2288                         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2289                                          IPPROTO_TCP, sk->opt,
2290                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2291                         if (tmp < 0) 
2292                         {
2293                                 sk->write_seq++;        /* Very important 8) */
2294                                 kfree_skb(buff,FREE_WRITE);
2295 
2296                                 /*
2297                                  * Enter FIN_WAIT1 to await completion of
2298                                  * written out data and ACK to our FIN.
2299                                  */
2300 
2301                                 if(sk->state==TCP_ESTABLISHED)
2302                                         sk->state=TCP_FIN_WAIT1;
2303                                 else
2304                                         sk->state=TCP_FIN_WAIT2;
2305                                 reset_timer(sk, TIME_CLOSE,4*sk->rto);
2306                                 if(timeout)
2307                                         tcp_time_wait(sk);
2308 
2309                                 release_sock(sk);
2310                                 return;
2311                         }
2312 
2313                         t1 =(struct tcphdr *)((char *)t1 +tmp);
2314                         buff->len += tmp;
2315                         buff->dev = dev;
2316                         memcpy(t1, th, sizeof(*t1));
2317                         t1->seq = ntohl(sk->write_seq);
2318                         sk->write_seq++;
2319                         buff->h.seq = sk->write_seq;
2320                         t1->ack = 1;
2321         
2322                         /* 
2323                          *      Ack everything immediately from now on. 
2324                          */
2325 
2326                         sk->delay_acks = 0;
2327                         t1->ack_seq = ntohl(sk->acked_seq);
2328                         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
2329                         t1->fin = 1;
2330                         t1->rst = need_reset;
2331                         t1->doff = sizeof(*t1)/4;
2332                         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2333 
2334                         tcp_statistics.TcpOutSegs++;
2335         
2336                         if (skb_peek(&sk->write_queue) == NULL) 
2337                         {
2338                                 sk->sent_seq = sk->write_seq;
2339                                 prot->queue_xmit(sk, dev, buff, 0);
2340                         } 
2341                         else 
2342                         {
2343                                 reset_timer(sk, TIME_WRITE, sk->rto);
2344                                 if (buff->next != NULL) 
2345                                 {
2346                                         printk("tcp_close: next != NULL\n");
2347                                         skb_unlink(buff);
2348                                 }
2349                                 skb_queue_tail(&sk->write_queue, buff);
2350                         }
2351 
2352                         /*
2353                          * If established (normal close), enter FIN_WAIT1.
2354                          * If in CLOSE_WAIT, enter LAST_ACK
2355                          * If in CLOSING, remain in CLOSING
2356                          * otherwise enter FIN_WAIT2
2357                          */
2358 
2359                         if (sk->state == TCP_ESTABLISHED)
2360                             sk->state = TCP_FIN_WAIT1;
2361                         else if (sk->state == TCP_CLOSE_WAIT)
2362                             sk->state = TCP_LAST_ACK;
2363                         else if (sk->state != TCP_CLOSING)
2364                             sk->state = TCP_FIN_WAIT2;
2365         }
2366         release_sock(sk);
2367 }
2368 
2369 
2370 /*
2371  * This routine takes stuff off of the write queue,
2372  * and puts it in the xmit queue.
2373  */
2374 static void
2375 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2376 {
2377         struct sk_buff *skb;
2378 
2379         /*
2380          *      The bytes will have to remain here. In time closedown will
2381          *      empty the write queue and all will be happy 
2382          */
2383 
2384         if(sk->zapped)
2385                 return;
2386 
2387         while((skb = skb_peek(&sk->write_queue)) != NULL &&
2388                 before(skb->h.seq, sk->window_seq + 1) &&
2389                 (sk->retransmits == 0 ||
2390                  sk->timeout != TIME_WRITE ||
2391                  before(skb->h.seq, sk->rcv_ack_seq + 1))
2392                 && sk->packets_out < sk->cong_window) 
2393         {
2394                 IS_SKB(skb);
2395                 skb_unlink(skb);
2396                 /* See if we really need to send the packet. */
2397                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) 
2398                 {
2399                         sk->retransmits = 0;
2400                         kfree_skb(skb, FREE_WRITE);
2401                         if (!sk->dead) 
2402                                 sk->write_space(sk);
2403                 } 
2404                 else
2405                 {
2406                         struct tcphdr *th;
2407                         struct iphdr *iph;
2408                         int size;
2409 /*
2410  * put in the ack seq and window at this point rather than earlier,
2411  * in order to keep them monotonic.  We really want to avoid taking
2412  * back window allocations.  That's legal, but RFC1122 says it's frowned on.
2413  * Ack and window will in general have changed since this packet was put
2414  * on the write queue.
2415  */
2416                         iph = (struct iphdr *)(skb->data +
2417                                                skb->dev->hard_header_len);
2418                         th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
2419                         size = skb->len - (((unsigned char *) th) - skb->data);
2420                         
2421                         th->ack_seq = ntohl(sk->acked_seq);
2422                         th->window = ntohs(tcp_select_window(sk));
2423 
2424                         tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
2425 
2426                         sk->sent_seq = skb->h.seq;
2427                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2428                 }
2429         }
2430 }
2431 
2432 
2433 /*
2434  *      This routine sorts the send list, and resets the
2435  *      sk->send_head and sk->send_tail pointers.
2436  */
2437 
2438 static void sort_send(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2439 {
2440         struct sk_buff *list = NULL;
2441         struct sk_buff *skb,*skb2,*skb3;
2442 
2443         for (skb = sk->send_head; skb != NULL; skb = skb2) 
2444         {
2445                 skb2 = skb->link3;
2446                 if (list == NULL || before (skb2->h.seq, list->h.seq)) 
2447                 {
2448                         skb->link3 = list;
2449                         sk->send_tail = skb;
2450                         list = skb;
2451                 }
2452                 else
2453                 {
2454                         for (skb3 = list; ; skb3 = skb3->link3) 
2455                         {
2456                                 if (skb3->link3 == NULL ||
2457                                     before(skb->h.seq, skb3->link3->h.seq))
2458                                 {
2459                                         skb->link3 = skb3->link3;
2460                                         skb3->link3 = skb;
2461                                         if (skb->link3 == NULL) 
2462                                                 sk->send_tail = skb;
2463                                         break;
2464                                 }
2465                         }
2466                 }
2467         }
2468         sk->send_head = list;
2469 }
2470   
2471 
2472 /*
2473  *      This routine deals with incoming acks, but not outgoing ones.
2474  */
2475 
2476 static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2477 {
2478         unsigned long ack;
2479         int flag = 0;
2480 
2481         /* 
2482          * 1 - there was data in packet as well as ack or new data is sent or 
2483          *     in shutdown state
2484          * 2 - data from retransmit queue was acked and removed
2485          * 4 - window shrunk or data from retransmit queue was acked and removed
2486          */
2487 
2488         if(sk->zapped)
2489                 return(1);      /* Dead, cant ack any more so why bother */
2490 
2491         ack = ntohl(th->ack_seq);
2492         if (ntohs(th->window) > sk->max_window) 
2493         {
2494                 sk->max_window = ntohs(th->window);
2495 #ifdef CONFIG_INET_PCTCP
2496                 sk->mss = min(sk->max_window>>1, sk->mtu);
2497 #else
2498                 sk->mss = min(sk->max_window, sk->mtu);
2499 #endif  
2500         }
2501 
2502         if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2503                 sk->retransmits = 0;
2504 
2505 #if 0
2506 /*
2507  *      Not quite clear why the +1 and -1 here, and why not +1 in next line 
2508  */
2509  
2510         if (after(ack, sk->sent_seq+1) || before(ack, sk->rcv_ack_seq-1)) 
2511 #else   
2512         if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
2513 #endif  
2514         {
2515                 if(sk->debug)
2516                         printk("Ack ignored %lu %lu\n",ack,sk->sent_seq);
2517                 if (after(ack, sk->sent_seq) ||
2518                    (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) 
2519                 {
2520                         return(0);
2521                 }
2522                 if (sk->keepopen) 
2523                 {
2524                         reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2525                 }
2526                 return(1);
2527         }
2528 
2529         if (len != th->doff*4) 
2530                 flag |= 1;
2531 
2532         /* See if our window has been shrunk. */
2533 
2534         if (after(sk->window_seq, ack+ntohs(th->window))) 
2535         {
2536                 /*
2537                  * We may need to move packets from the send queue
2538                  * to the write queue, if the window has been shrunk on us.
2539                  * The RFC says you are not allowed to shrink your window
2540                  * like this, but if the other end does, you must be able
2541                  * to deal with it.
2542                  */
2543                 struct sk_buff *skb;
2544                 struct sk_buff *skb2;
2545                 struct sk_buff *wskb = NULL;
2546         
2547                 skb2 = sk->send_head;
2548                 sk->send_head = NULL;
2549                 sk->send_tail = NULL;
2550         
2551                 flag |= 4;
2552         
2553                 sk->window_seq = ack + ntohs(th->window);
2554                 cli();
2555                 while (skb2 != NULL) 
2556                 {
2557                         skb = skb2;
2558                         skb2 = skb->link3;
2559                         skb->link3 = NULL;
2560                         if (after(skb->h.seq, sk->window_seq)) 
2561                         {
2562                                 if (sk->packets_out > 0) 
2563                                         sk->packets_out--;
2564                                 /* We may need to remove this from the dev send list. */
2565                                 if (skb->next != NULL) 
2566                                 {
2567                                         skb_unlink(skb);                                
2568                                 }
2569                                 /* Now add it to the write_queue. */
2570                                 if (wskb == NULL)
2571                                         skb_queue_head(&sk->write_queue,skb);
2572                                 else
2573                                         skb_append(wskb,skb);
2574                                 wskb = skb;
2575                         } 
2576                         else 
2577                         {
2578                                 if (sk->send_head == NULL) 
2579                                 {
2580                                         sk->send_head = skb;
2581                                         sk->send_tail = skb;
2582                                 }
2583                                 else
2584                                 {
2585                                         sk->send_tail->link3 = skb;
2586                                         sk->send_tail = skb;
2587                                 }
2588                                 skb->link3 = NULL;
2589                         }
2590                 }
2591                 sti();
2592         }
2593 
2594         if (sk->send_tail == NULL || sk->send_head == NULL) 
2595         {
2596                 sk->send_head = NULL;
2597                 sk->send_tail = NULL;
2598                 sk->packets_out= 0;
2599         }
2600 
2601         sk->window_seq = ack + ntohs(th->window);
2602 
2603         /* We don't want too many packets out there. */
2604         if (sk->timeout == TIME_WRITE && 
2605                 sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
2606         {
2607 /* 
2608  * This is Jacobson's slow start and congestion avoidance. 
2609  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2610  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2611  * counter and increment it once every cwnd times.  It's possible
2612  * that this should be done only if sk->retransmits == 0.  I'm
2613  * interpreting "new data is acked" as including data that has
2614  * been retransmitted but is just now being acked.
2615  */
2616                 if (sk->cong_window < sk->ssthresh)  
2617                   /* 
2618                    *    In "safe" area, increase
2619                    */
2620                         sk->cong_window++;
2621                 else 
2622                 {
2623                   /*
2624                    *    In dangerous area, increase slowly.  In theory this is
2625                    *    sk->cong_window += 1 / sk->cong_window
2626                    */
2627                         if (sk->cong_count >= sk->cong_window) 
2628                         {
2629                                 sk->cong_window++;
2630                                 sk->cong_count = 0;
2631                         }
2632                         else 
2633                                 sk->cong_count++;
2634                 }
2635         }
2636 
2637         sk->rcv_ack_seq = ack;
2638 
2639         /*
2640          * if this ack opens up a zero window, clear backoff.  It was
2641          * being used to time the probes, and is probably far higher than
2642          * it needs to be for normal retransmission.
2643          */
2644 
2645         if (sk->timeout == TIME_PROBE0) 
2646         {
2647                 if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
2648                     ! before (sk->window_seq, sk->write_queue.next->h.seq)) 
2649                 {
2650                         sk->retransmits = 0;
2651                         sk->backoff = 0;
2652                   /*
2653                    *    Recompute rto from rtt.  this eliminates any backoff.
2654                    */
2655 
2656                         sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2657                         if (sk->rto > 120*HZ)
2658                                 sk->rto = 120*HZ;
2659                         if (sk->rto < 20)       /* Was 1*HZ, then 1 - turns out we must allow about
2660                                                    .2 of a second because of BSD delayed acks - on a 100Mb/sec link
2661                                                    .2 of a second is going to need huge windows (SIGH) */
2662                                 sk->rto = 20;
2663                 }
2664         }
2665 
2666   /* 
2667    *    See if we can take anything off of the retransmit queue.
2668    */
2669    
2670         while(sk->send_head != NULL) 
2671         {
2672                 /* Check for a bug. */
2673                 if (sk->send_head->link3 &&
2674                     after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) 
2675                 {
2676                         printk("INET: tcp.c: *** bug send_list out of order.\n");
2677                         sort_send(sk);
2678                 }
2679 
2680                 if (before(sk->send_head->h.seq, ack+1)) 
2681                 {
2682                         struct sk_buff *oskb;   
2683                         if (sk->retransmits) 
2684                         {       
2685                                 /*
2686                                  *      We were retransmitting.  don't count this in RTT est 
2687                                  */
2688                                 flag |= 2;
2689 
2690                                 /*
2691                                  * even though we've gotten an ack, we're still
2692                                  * retransmitting as long as we're sending from
2693                                  * the retransmit queue.  Keeping retransmits non-zero
2694                                  * prevents us from getting new data interspersed with
2695                                  * retransmissions.
2696                                  */
2697 
2698                                 if (sk->send_head->link3)
2699                                         sk->retransmits = 1;
2700                                 else
2701                                         sk->retransmits = 0;
2702                         }
2703                         /*
2704                          * Note that we only reset backoff and rto in the
2705                          * rtt recomputation code.  And that doesn't happen
2706                          * if there were retransmissions in effect.  So the
2707                          * first new packet after the retransmissions is
2708                          * sent with the backoff still in effect.  Not until
2709                          * we get an ack from a non-retransmitted packet do
2710                          * we reset the backoff and rto.  This allows us to deal
2711                          * with a situation where the network delay has increased
2712                          * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2713                          */
2714 
2715                         /*
2716                          *      We have one less packet out there. 
2717                          */
2718                          
2719                         if (sk->packets_out > 0) 
2720                                 sk->packets_out --;
2721                         /* 
2722                          *      Wake up the process, it can probably write more. 
2723                          */
2724                         if (!sk->dead) 
2725                                 sk->write_space(sk);
2726                         oskb = sk->send_head;
2727 
2728                         if (!(flag&2)) 
2729                         {
2730                                 long m;
2731         
2732                                 /*
2733                                  *      The following amusing code comes from Jacobson's
2734                                  *      article in SIGCOMM '88.  Note that rtt and mdev
2735                                  *      are scaled versions of rtt and mean deviation.
2736                                  *      This is designed to be as fast as possible 
2737                                  *      m stands for "measurement".
2738                                  */
2739         
2740                                 m = jiffies - oskb->when;  /* RTT */
2741                                 if(m<=0)
2742                                         m=1;            /* IS THIS RIGHT FOR <0 ??? */
2743                                 m -= (sk->rtt >> 3);    /* m is now error in rtt est */
2744                                 sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
2745                                 if (m < 0)
2746                                         m = -m;         /* m is now abs(error) */
2747                                 m -= (sk->mdev >> 2);   /* similar update on mdev */
2748                                 sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
2749         
2750                                 /*
2751                                  *      Now update timeout.  Note that this removes any backoff.
2752                                  */
2753                          
2754                                 sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2755                                 if (sk->rto > 120*HZ)
2756                                         sk->rto = 120*HZ;
2757                                 if (sk->rto < 20)       /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
2758                                         sk->rto = 20;
2759                                 sk->backoff = 0;
2760                         }
2761                         flag |= (2|4);
2762                         cli();
2763                         oskb = sk->send_head;
2764                         IS_SKB(oskb);
2765                         sk->send_head = oskb->link3;
2766                         if (sk->send_head == NULL) 
2767                         {
2768                                 sk->send_tail = NULL;
2769                         }
2770 
2771                 /*
2772                  *      We may need to remove this from the dev send list. 
2773                  */
2774 
2775                         if (oskb->next)
2776                                 skb_unlink(oskb);
2777                         sti();
2778                         kfree_skb(oskb, FREE_WRITE); /* write. */
2779                         if (!sk->dead) 
2780                                 sk->write_space(sk);
2781                 }
2782                 else
2783                 {
2784                         break;
2785                 }
2786         }
2787 
2788         /*
2789          * Maybe we can take some stuff off of the write queue,
2790          * and put it onto the xmit queue.
2791          */
2792         if (skb_peek(&sk->write_queue) != NULL) 
2793         {
2794                 if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
2795                         (sk->retransmits == 0 || 
2796                          sk->timeout != TIME_WRITE ||
2797                          before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
2798                         && sk->packets_out < sk->cong_window) 
2799                 {
2800                         flag |= 1;
2801                         tcp_write_xmit(sk);
2802                 }
2803                 else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
2804                         sk->send_head == NULL &&
2805                         sk->ack_backlog == 0 &&
2806                         sk->state != TCP_TIME_WAIT) 
2807                 {
2808                         reset_timer(sk, TIME_PROBE0, sk->rto);
2809                 }               
2810         }
2811         else
2812         {
2813                 if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2814                 sk->state != TCP_TIME_WAIT && !sk->keepopen) 
2815                 {
2816                         if (!sk->dead)
2817                                 sk->write_space(sk);
2818                         if (sk->keepopen)
2819                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2820                         else
2821                                 delete_timer(sk);
2822                 }
2823                 else
2824                 {
2825                         if (sk->state != (unsigned char) sk->keepopen) 
2826                         {
2827                                 reset_timer(sk, TIME_WRITE, sk->rto);
2828                         }
2829                         if (sk->state == TCP_TIME_WAIT) 
2830                         {
2831                                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2832                         }       
2833                 }
2834         }
2835 
2836         if (sk->packets_out == 0 && sk->partial != NULL &&
2837                 skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
2838         {
2839                 flag |= 1;
2840                 tcp_send_partial(sk);
2841         }
2842 
2843         /*
2844          * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
2845          * we are now waiting for an acknowledge to our FIN.  The other end is
2846          * already in TIME_WAIT.
2847          *
2848          * Move to TCP_CLOSE on success.
2849          */
2850 
2851         if (sk->state == TCP_LAST_ACK) 
2852         {
2853                 if (!sk->dead)
2854                         sk->state_change(sk);
2855                 if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) 
2856                 {
2857                         flag |= 1;
2858                         sk->state = TCP_CLOSE;
2859                         sk->shutdown = SHUTDOWN_MASK;
2860                 }
2861         }
2862 
2863         /*
2864          * Incomming ACK to a FIN we sent in the case of our initiating the close.
2865          *
2866          * Move to FIN_WAIT2 to await a FIN from the other end.
2867          */
2868 
2869         if (sk->state == TCP_FIN_WAIT1) 
2870         {
2871 
2872                 if (!sk->dead) 
2873                         sk->state_change(sk);
2874                 if (sk->rcv_ack_seq == sk->write_seq) 
2875                 {
2876                         flag |= 1;
2877                         if (sk->acked_seq != sk->fin_seq) 
2878                         {
2879                                 tcp_time_wait(sk);
2880                         }
2881                         else
2882                         {
2883                                 sk->shutdown = SHUTDOWN_MASK;
2884                                 sk->state = TCP_FIN_WAIT2;
2885                         }
2886                 }
2887         }
2888 
2889         /*
2890          *      Incoming ACK to a FIN we sent in the case of a simultanious close.
2891          *
2892          *      Move to TIME_WAIT
2893          */
2894 
2895         if (sk->state == TCP_CLOSING) 
2896         {
2897 
2898                 if (!sk->dead) 
2899                         sk->state_change(sk);
2900                 if (sk->rcv_ack_seq == sk->write_seq) 
2901                 {
2902                         flag |= 1;
2903                         tcp_time_wait(sk);
2904                 }
2905         }
2906 
2907         /*
2908          * I make no guarantees about the first clause in the following
2909          * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
2910          * what conditions "!flag" would be true.  However I think the rest
2911          * of the conditions would prevent that from causing any
2912          * unnecessary retransmission. 
2913          *   Clearly if the first packet has expired it should be 
2914          * retransmitted.  The other alternative, "flag&2 && retransmits", is
2915          * harder to explain:  You have to look carefully at how and when the
2916          * timer is set and with what timeout.  The most recent transmission always
2917          * sets the timer.  So in general if the most recent thing has timed
2918          * out, everything before it has as well.  So we want to go ahead and
2919          * retransmit some more.  If we didn't explicitly test for this
2920          * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
2921          * would not be true.  If you look at the pattern of timing, you can
2922          * show that rto is increased fast enough that the next packet would
2923          * almost never be retransmitted immediately.  Then you'd end up
2924          * waiting for a timeout to send each packet on the retranmission
2925          * queue.  With my implementation of the Karn sampling algorithm,
2926          * the timeout would double each time.  The net result is that it would
2927          * take a hideous amount of time to recover from a single dropped packet.
2928          * It's possible that there should also be a test for TIME_WRITE, but
2929          * I think as long as "send_head != NULL" and "retransmit" is on, we've
2930          * got to be in real retransmission mode.
2931          *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
2932          * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
2933          * As long as no further losses occur, this seems reasonable.
2934          */
2935         
2936         if (((!flag) || (flag&4)) && sk->send_head != NULL &&
2937                (((flag&2) && sk->retransmits) ||
2938                (sk->send_head->when + sk->rto < jiffies))) 
2939         {
2940                 ip_do_retransmit(sk, 1);
2941                 reset_timer(sk, TIME_WRITE, sk->rto);
2942         }
2943 
2944         return(1);
2945 }
2946 
2947 
2948 /*
2949  *      This routine handles the data.  If there is room in the buffer,
2950  *      it will be have already been moved into it.  If there is no
2951  *      room, then we will just have to discard the packet.
2952  */
2953 
2954 static int tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
2955          unsigned long saddr, unsigned short len)
2956 {
2957         struct sk_buff *skb1, *skb2;
2958         struct tcphdr *th;
2959         int dup_dumped=0;
2960         unsigned long new_seq;
2961 
2962         th = skb->h.th;
2963         skb->len = len -(th->doff*4);
2964 
2965         /* The bytes in the receive read/assembly queue has increased. Needed for the
2966            low memory discard algorithm */
2967            
2968         sk->bytes_rcv += skb->len;
2969         
2970         if (skb->len == 0 && !th->fin && !th->urg && !th->psh) 
2971         {
2972                 /* 
2973                  *      Don't want to keep passing ack's back and forth. 
2974                  *      (someone sent us dataless, boring frame)
2975                  */
2976                 if (!th->ack)
2977                         tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
2978                 kfree_skb(skb, FREE_READ);
2979                 return(0);
2980         }
2981         
2982         /*
2983          *      We no longer have anyone receiving data on this connection.
2984          */
2985 
2986         if(sk->shutdown & RCV_SHUTDOWN)
2987         {
2988                 new_seq= th->seq + skb->len + th->syn;  /* Right edge of _data_ part of frame */
2989                 
2990                 if(after(new_seq,sk->copied_seq+1))     /* If the right edge of this frame is after the last copied byte
2991                                                            then it contains data we will never touch. We send an RST to 
2992                                                            ensure the far end knows it never got to the application */
2993                 {
2994                         sk->acked_seq = new_seq + th->fin;
2995                         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
2996                                 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
2997                         tcp_statistics.TcpEstabResets++;
2998                         sk->state = TCP_CLOSE;
2999                         sk->err = EPIPE;
3000                         sk->shutdown = SHUTDOWN_MASK;
3001                         kfree_skb(skb, FREE_READ);
3002                         if (!sk->dead)
3003                                 sk->state_change(sk);
3004                         return(0);
3005                 }
3006 #if 0           
3007                 /* Discard the frame here - we've already proved its a duplicate */
3008                 
3009                 kfree_skb(skb, FREE_READ);
3010                 return(0);                              
3011 #endif          
3012         }
3013         /*
3014          *      Now we have to walk the chain, and figure out where this one
3015          *      goes into it.  This is set up so that the last packet we received
3016          *      will be the first one we look at, that way if everything comes
3017          *      in order, there will be no performance loss, and if they come
3018          *      out of order we will be able to fit things in nicely.
3019          */
3020 
3021         /* 
3022          *      This should start at the last one, and then go around forwards.
3023          */
3024 
3025         if (skb_peek(&sk->receive_queue) == NULL)       /* Empty queue is easy case */
3026         {
3027                 skb_queue_head(&sk->receive_queue,skb);
3028                 skb1= NULL;
3029         } 
3030         else
3031         {
3032                 for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
3033                 {
3034                         if(sk->debug)
3035                         {
3036                                 printk("skb1=%p :", skb1);
3037                                 printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
3038                                 printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
3039                                 printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
3040                                                 sk->acked_seq);
3041                         }
3042                         
3043                         /*
3044                          *      Optimisation: Duplicate frame or extension of previous frame from
3045                          *      same sequence point (lost ack case).
3046                          *      The frame contains duplicate data or replaces a previous frame
3047                          *      discard the previous frame (safe as sk->inuse is set) and put
3048                          *      the new one in its place.
3049                          */
3050                          
3051                         if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
3052                         {
3053                                 skb_append(skb1,skb);
3054                                 skb_unlink(skb1);
3055                                 kfree_skb(skb1,FREE_READ);
3056                                 dup_dumped=1;
3057                                 skb1=NULL;
3058                                 break;
3059                         }
3060                         
3061                         /*
3062                          *      Found where it fits
3063                          */
3064                          
3065                         if (after(th->seq+1, skb1->h.th->seq))
3066                         {
3067                                 skb_append(skb1,skb);
3068                                 break;
3069                         }
3070                         
3071                         /*
3072                          *      See if we've hit the start. If so insert.
3073                          */
3074                         if (skb1 == skb_peek(&sk->receive_queue))
3075                         {
3076                                 skb_queue_head(&sk->receive_queue, skb);
3077                                 break;
3078                         }
3079                 }
3080         }
3081 
3082         /*
3083          *      Figure out what the ack value for this frame is
3084          */
3085          
3086         th->ack_seq = th->seq + skb->len;
3087         if (th->syn) 
3088                 th->ack_seq++;
3089         if (th->fin)
3090                 th->ack_seq++;
3091 
3092         if (before(sk->acked_seq, sk->copied_seq)) 
3093         {
3094                 printk("*** tcp.c:tcp_data bug acked < copied\n");
3095                 sk->acked_seq = sk->copied_seq;
3096         }
3097 
3098         /*
3099          *      Now figure out if we can ack anything.
3100          */
3101 
3102         if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) 
3103         {
3104                 if (before(th->seq, sk->acked_seq+1)) 
3105                 {
3106                         int newwindow;
3107 
3108                         if (after(th->ack_seq, sk->acked_seq)) 
3109                         {
3110                                 newwindow = sk->window-(th->ack_seq - sk->acked_seq);
3111                                 if (newwindow < 0)
3112                                         newwindow = 0;  
3113                                 sk->window = newwindow;
3114                                 sk->acked_seq = th->ack_seq;
3115                         }
3116                         skb->acked = 1;
3117 
3118                         /* 
3119                          *      When we ack the fin, we turn on the RCV_SHUTDOWN flag.
3120                          */
3121 
3122                         if (skb->h.th->fin) 
3123                         {
3124                                 if (!sk->dead) 
3125                                         sk->state_change(sk);
3126                                 sk->shutdown |= RCV_SHUTDOWN;
3127                         }
3128           
3129                         for(skb2 = skb->next;
3130                             skb2 != (struct sk_buff *)&sk->receive_queue;
3131                             skb2 = skb2->next) 
3132                         {
3133                                 if (before(skb2->h.th->seq, sk->acked_seq+1)) 
3134                                 {
3135                                         if (after(skb2->h.th->ack_seq, sk->acked_seq))
3136                                         {
3137                                                 newwindow = sk->window -
3138                                                  (skb2->h.th->ack_seq - sk->acked_seq);
3139                                                 if (newwindow < 0)
3140                                                         newwindow = 0;  
3141                                                 sk->window = newwindow;
3142                                                 sk->acked_seq = skb2->h.th->ack_seq;
3143                                         }
3144                                         skb2->acked = 1;
3145                                         /*
3146                                          *      When we ack the fin, we turn on
3147                                          *      the RCV_SHUTDOWN flag.
3148                                          */
3149                                         if (skb2->h.th->fin) 
3150                                         {
3151                                                 sk->shutdown |= RCV_SHUTDOWN;
3152                                                 if (!sk->dead)
3153                                                         sk->state_change(sk);
3154                                         }
3155 
3156                                         /*
3157                                          *      Force an immediate ack.
3158                                          */
3159                                          
3160                                         sk->ack_backlog = sk->max_ack_backlog;
3161                                 }
3162                                 else
3163                                 {
3164                                         break;
3165                                 }
3166                         }
3167 
3168                         /*
3169                          *      This also takes care of updating the window.
3170                          *      This if statement needs to be simplified.
3171                          */
3172                         if (!sk->delay_acks ||
3173                             sk->ack_backlog >= sk->max_ack_backlog || 
3174                             sk->bytes_rcv > sk->max_unacked || th->fin) {
3175         /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
3176                         }
3177                         else 
3178                         {
3179                                 sk->ack_backlog++;
3180                                 if(sk->debug)
3181                                         printk("Ack queued.\n");
3182                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3183                         }
3184                 }
3185         }
3186 
3187         /*
3188          *      If we've missed a packet, send an ack.
3189          *      Also start a timer to send another.
3190          */
3191          
3192         if (!skb->acked) 
3193         {
3194         
3195         /*
3196          *      This is important.  If we don't have much room left,
3197          *      we need to throw out a few packets so we have a good
3198          *      window.  Note that mtu is used, not mss, because mss is really
3199          *      for the send side.  He could be sending us stuff as large as mtu.
3200          */
3201                  
3202                 while (sk->prot->rspace(sk) < sk->mtu) 
3203                 {
3204                         skb1 = skb_peek(&sk->receive_queue);
3205                         if (skb1 == NULL) 
3206                         {
3207                                 printk("INET: tcp.c:tcp_data memory leak detected.\n");
3208                                 break;
3209                         }
3210 
3211                         /*
3212                          *      Don't throw out something that has been acked. 
3213                          */
3214                  
3215                         if (skb1->acked) 
3216                         {
3217                                 break;
3218                         }
3219                 
3220                         skb_unlink(skb1);
3221                         kfree_skb(skb1, FREE_READ);
3222                 }
3223                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3224                 sk->ack_backlog++;
3225                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3226         }
3227         else
3228         {
3229                 /* We missed a packet.  Send an ack to try to resync things. */
3230                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3231         }
3232 
3233         /*
3234          *      Now tell the user we may have some data. 
3235          */
3236          
3237         if (!sk->dead) 
3238         {
3239                 if(sk->debug)
3240                         printk("Data wakeup.\n");
3241                 sk->data_ready(sk,0);
3242         } 
3243         return(0);
3244 }
3245 
3246 
3247 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
3248 {
3249         unsigned long ptr = ntohs(th->urg_ptr);
3250 
3251         if (ptr)
3252                 ptr--;
3253         ptr += th->seq;
3254 
3255         /* ignore urgent data that we've already seen and read */
3256         if (after(sk->copied_seq+1, ptr))
3257                 return;
3258 
3259         /* do we already have a newer (or duplicate) urgent pointer? */
3260         if (sk->urg_data && !after(ptr, sk->urg_seq))
3261                 return;
3262 
3263         /* tell the world about our new urgent pointer */
3264         if (sk->proc != 0) {
3265                 if (sk->proc > 0) {
3266                         kill_proc(sk->proc, SIGURG, 1);
3267                 } else {
3268                         kill_pg(-sk->proc, SIGURG, 1);
3269                 }
3270         }
3271         sk->urg_data = URG_NOTYET;
3272         sk->urg_seq = ptr;
3273 }
3274 
3275 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
3276         unsigned long saddr, unsigned long len)
3277 {
3278         unsigned long ptr;
3279 
3280         /* check if we get a new urgent pointer */
3281         if (th->urg)
3282                 tcp_check_urg(sk,th);
3283 
3284         /* do we wait for any urgent data? */
3285         if (sk->urg_data != URG_NOTYET)
3286                 return 0;
3287 
3288         /* is the urgent pointer pointing into this packet? */
3289         ptr = sk->urg_seq - th->seq + th->doff*4;
3290         if (ptr >= len)
3291                 return 0;
3292 
3293         /* ok, got the correct packet, update info */
3294         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
3295         if (!sk->dead)
3296                 sk->data_ready(sk,0);
3297         return 0;
3298 }
3299 
3300 
3301 /*
3302  *  This deals with incoming fins. 'Linus at 9 O'clock' 8-) 
3303  *
3304  *  If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
3305  *  (and thence onto LAST-ACK and finally, CLOSED, we never enter
3306  *  TIME-WAIT)
3307  *
3308  *  If we are in FINWAIT-1, a received FIN indicates simultanious
3309  *  close and we go into CLOSING (and later onto TIME-WAIT)
3310  *
3311  *  If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3312  *
3313  */
3314  
3315 static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
3316          unsigned long saddr, struct device *dev)
3317 {
3318         sk->fin_seq = th->seq + skb->len + th->syn + th->fin;
3319 
3320         if (!sk->dead) 
3321         {
3322                 sk->state_change(sk);
3323         }
3324 
3325         switch(sk->state) 
3326         {
3327                 case TCP_SYN_RECV:
3328                 case TCP_SYN_SENT:
3329                 case TCP_ESTABLISHED:
3330                         /*
3331                          * move to CLOSE_WAIT, tcp_data() already handled
3332                          * sending the ack.
3333                          */
3334                         reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
3335                         /*sk->fin_seq = th->seq+1;*/
3336                         tcp_statistics.TcpCurrEstab--;
3337                         sk->state = TCP_CLOSE_WAIT;
3338                         if (th->rst)
3339                                 sk->shutdown = SHUTDOWN_MASK;
3340                         break;
3341 
3342                 case TCP_CLOSE_WAIT:
3343                 case TCP_CLOSING:
3344                         /*
3345                          * received a retransmission of the FIN, do
3346                          * nothing.
3347                          */
3348                         break;
3349                 case TCP_TIME_WAIT:
3350                         /*
3351                          * received a retransmission of the FIN,
3352                          * restart the TIME_WAIT timer.
3353                          */
3354                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3355                         return(0);
3356                 case TCP_FIN_WAIT1:
3357                         /*
3358                          * This case occurs when a simultanious close
3359                          * happens, we must ack the received FIN and
3360                          * enter the CLOSING state.
3361                          *
3362                          * XXX timeout not set properly
3363                          */
3364 
3365                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3366                         /*sk->fin_seq = th->seq+1;*/
3367                         sk->state = TCP_CLOSING;
3368                         break;
3369                 case TCP_FIN_WAIT2:
3370                         /*
3371                          * received a FIN -- send ACK and enter TIME_WAIT
3372                          */
3373                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3374                         /*sk->fin_seq = th->seq+1;*/
3375                         sk->state = TCP_TIME_WAIT;
3376                         break;
3377                 case TCP_CLOSE:
3378                         /*
3379                          * already in CLOSE
3380                          */
3381                         break;
3382                 default:
3383                         sk->state = TCP_LAST_ACK;
3384         
3385                         /* Start the timers. */
3386                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3387                         return(0);
3388         }
3389         sk->ack_backlog++;
3390 
3391         return(0);
3392 }
3393 
3394 
3395 /* This will accept the next outstanding connection. */
3396 static struct sock *
3397 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
3398 {
3399         struct sock *newsk;
3400         struct sk_buff *skb;
3401   
3402   /*
3403    * We need to make sure that this socket is listening,
3404    * and that it has something pending.
3405    */
3406 
3407         if (sk->state != TCP_LISTEN) 
3408         {
3409                 sk->err = EINVAL;
3410                 return(NULL); 
3411         }
3412 
3413         /* Avoid the race. */
3414         cli();
3415         sk->inuse = 1;
3416 
3417         while((skb = skb_dequeue(&sk->receive_queue)) == NULL) 
3418         {
3419                 if (flags & O_NONBLOCK) 
3420                 {
3421                         sti();
3422                         release_sock(sk);
3423                         sk->err = EAGAIN;
3424                         return(NULL);
3425                 }
3426 
3427                 release_sock(sk);
3428                 interruptible_sleep_on(sk->sleep);
3429                 if (current->signal & ~current->blocked) 
3430                 {
3431                         sti();
3432                         sk->err = ERESTARTSYS;
3433                         return(NULL);
3434                 }
3435                 sk->inuse = 1;
3436         }
3437         sti();
3438 
3439         /*
3440          *      Now all we need to do is return skb->sk. 
3441          */
3442 
3443         newsk = skb->sk;
3444 
3445         kfree_skb(skb, FREE_READ);
3446         sk->ack_backlog--;
3447         release_sock(sk);
3448         return(newsk);
3449 }
3450 
3451 
3452 /*
3453  *      This will initiate an outgoing connection. 
3454  */
3455  
3456 static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3457 {
3458         struct sk_buff *buff;
3459         struct device *dev=NULL;
3460         unsigned char *ptr;
3461         int tmp;
3462         struct tcphdr *t1;
3463         struct rtable *rt;
3464 
3465         if (sk->state != TCP_CLOSE) 
3466                 return(-EISCONN);
3467 
3468         if (addr_len < 8) 
3469                 return(-EINVAL);
3470 
3471         if (usin->sin_family && usin->sin_family != AF_INET) 
3472                 return(-EAFNOSUPPORT);
3473 
3474         /*
3475          *      connect() to INADDR_ANY means loopback (BSD'ism).
3476          */
3477         
3478         if(usin->sin_addr.s_addr==INADDR_ANY)
3479                 usin->sin_addr.s_addr=ip_my_addr();
3480                   
3481         /*
3482          *      Don't want a TCP connection going to a broadcast address 
3483          */
3484 
3485         if (ip_chk_addr(usin->sin_addr.s_addr) == IS_BROADCAST) 
3486         { 
3487                 return -ENETUNREACH;
3488         }
3489   
3490         /*
3491          *      Connect back to the same socket: Blows up so disallow it 
3492          */
3493 
3494         if(sk->saddr == usin->sin_addr.s_addr && sk->num==ntohs(usin->sin_port))
3495                 return -EBUSY;
3496 
3497         sk->inuse = 1;
3498         sk->daddr = usin->sin_addr.s_addr;
3499         sk->write_seq = jiffies * SEQ_TICK - seq_offset;
3500         sk->window_seq = sk->write_seq;
3501         sk->rcv_ack_seq = sk->write_seq -1;
3502         sk->err = 0;
3503         sk->dummy_th.dest = usin->sin_port;
3504         release_sock(sk);
3505 
3506         buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3507         if (buff == NULL) 
3508         {
3509                 return(-ENOMEM);
3510         }
3511         sk->inuse = 1;
3512         buff->len = 24;
3513         buff->sk = sk;
3514         buff->free = 1;
3515         buff->localroute = sk->localroute;
3516         
3517         t1 = (struct tcphdr *) buff->data;
3518 
3519         /*
3520          *      Put in the IP header and routing stuff. 
3521          */
3522          
3523         rt=ip_rt_route(sk->daddr, NULL, NULL);
3524         
3525 
3526         /*
3527          *      We need to build the routing stuff fromt the things saved in skb. 
3528          */
3529 
3530         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3531                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3532         if (tmp < 0) 
3533         {
3534                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3535                 release_sock(sk);
3536                 return(-ENETUNREACH);
3537         }
3538 
3539         buff->len += tmp;
3540         t1 = (struct tcphdr *)((char *)t1 +tmp);
3541 
3542         memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3543         t1->seq = ntohl(sk->write_seq++);
3544         sk->sent_seq = sk->write_seq;
3545         buff->h.seq = sk->write_seq;
3546         t1->ack = 0;
3547         t1->window = 2;
3548         t1->res1=0;
3549         t1->res2=0;
3550         t1->rst = 0;
3551         t1->urg = 0;
3552         t1->psh = 0;
3553         t1->syn = 1;
3554         t1->urg_ptr = 0;
3555         t1->doff = 6;
3556         /* use 512 or whatever user asked for */
3557         
3558         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
3559                 sk->window_clamp=rt->rt_window;
3560         else
3561                 sk->window_clamp=0;
3562 
3563         if (sk->user_mss)
3564                 sk->mtu = sk->user_mss;
3565         else if(rt!=NULL && (rt->rt_flags&RTF_MTU))
3566                 sk->mtu = rt->rt_mss;
3567         else 
3568         {
3569 #ifdef CONFIG_INET_SNARL
3570                 if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3571 #else
3572                 if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3573 #endif
3574                         sk->mtu = 576 - HEADER_SIZE;
3575                 else
3576                         sk->mtu = MAX_WINDOW;
3577         }
3578         /*
3579          *      but not bigger than device MTU 
3580          */
3581 
3582         if(sk->mtu <32)
3583                 sk->mtu = 32;   /* Sanity limit */
3584                 
3585         sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3586         
3587         /*
3588          *      Put in the TCP options to say MTU. 
3589          */
3590 
3591         ptr = (unsigned char *)(t1+1);
3592         ptr[0] = 2;
3593         ptr[1] = 4;
3594         ptr[2] = (sk->mtu) >> 8;
3595         ptr[3] = (sk->mtu) & 0xff;
3596         tcp_send_check(t1, sk->saddr, sk->daddr,
3597                   sizeof(struct tcphdr) + 4, sk);
3598 
3599         /*
3600          *      This must go first otherwise a really quick response will get reset. 
3601          */
3602 
3603         sk->state = TCP_SYN_SENT;
3604 /*      sk->rtt = TCP_CONNECT_TIME;*/
3605         sk->rto = TCP_TIMEOUT_INIT;
3606         reset_timer(sk, TIME_WRITE, sk->rto);   /* Timer for repeating the SYN until an answer */
3607         sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3608 
3609         sk->prot->queue_xmit(sk, dev, buff, 0);  
3610         tcp_statistics.TcpActiveOpens++;
3611         tcp_statistics.TcpOutSegs++;
3612   
3613         release_sock(sk);
3614         return(0);
3615 }
3616 
3617 
3618 /* This functions checks to see if the tcp header is actually acceptable. */
3619 static int
3620 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3621              struct options *opt, unsigned long saddr, struct device *dev)
3622 {
3623         unsigned long next_seq;
3624 
3625         next_seq = len - 4*th->doff;
3626         if (th->fin)
3627                 next_seq++;
3628         /* if we have a zero window, we can't have any data in the packet.. */
3629         if (next_seq && !sk->window)
3630                 goto ignore_it;
3631         next_seq += th->seq;
3632 
3633         /*
3634          * This isn't quite right.  sk->acked_seq could be more recent
3635          * than sk->window.  This is however close enough.  We will accept
3636          * slightly more packets than we should, but it should not cause
3637          * problems unless someone is trying to forge packets.
3638          */
3639 
3640         /* have we already seen all of this packet? */
3641         if (!after(next_seq+1, sk->acked_seq))
3642                 goto ignore_it;
3643         /* or does it start beyond the window? */
3644         if (!before(th->seq, sk->acked_seq + sk->window + 1))
3645                 goto ignore_it;
3646 
3647         /* ok, at least part of this packet would seem interesting.. */
3648         return 1;
3649 
3650 ignore_it:
3651         if (th->rst)
3652                 return 0;
3653 
3654         /*
3655          *      Send a reset if we get something not ours and we are
3656          *      unsynchronized. Note: We don't do anything to our end. We
3657          *      are just killing the bogus remote connection then we will
3658          *      connect again and it will work (with luck).
3659          */
3660          
3661         if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) {
3662                 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3663                 return 1;
3664         }
3665 
3666         /* Try to resync things. */
3667         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3668         return 0;
3669 }
3670 
3671 
3672 #ifdef TCP_FASTPATH
3673 /*
3674  *      Is the end of the queue clear of fragments as yet unmerged into the data stream
3675  *      Yes if
3676  *      a) The queue is empty
3677  *      b) The last frame on the queue has the acked flag set
3678  */
3679 
3680 static inline int tcp_clean_end(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3681 {
3682         struct sk_buff *skb=skb_peek(&sk->receive_queue);
3683         if(skb==NULL || sk->receive_queue.prev->acked)
3684                 return 1;
3685 }
3686 
3687 #endif
3688 
3689 int
3690 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3691         unsigned long daddr, unsigned short len,
3692         unsigned long saddr, int redo, struct inet_protocol * protocol)
3693 {
3694         struct tcphdr *th;
3695         struct sock *sk;
3696 
3697         if (!skb) 
3698         {
3699                 return(0);
3700         }
3701 
3702         if (!dev) 
3703         {
3704                 return(0);
3705         }
3706   
3707         tcp_statistics.TcpInSegs++;
3708   
3709         if(skb->pkt_type!=PACKET_HOST)
3710         {
3711                 kfree_skb(skb,FREE_READ);
3712                 return(0);
3713         }
3714   
3715         th = skb->h.th;
3716 
3717         /*
3718          *      Find the socket.
3719          */
3720 
3721         sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3722 
3723         /*
3724          *      If this socket has got a reset its to all intents and purposes 
3725          *      really dead 
3726          */
3727          
3728         if (sk!=NULL && sk->zapped)
3729                 sk=NULL;
3730 
3731         if (!redo) 
3732         {
3733                 if (tcp_check(th, len, saddr, daddr )) 
3734                 {
3735                         skb->sk = NULL;
3736                         kfree_skb(skb,FREE_READ);
3737                         /*
3738                          * We don't release the socket because it was
3739                          * never marked in use.
3740                          */
3741                         return(0);
3742                 }
3743                 th->seq = ntohl(th->seq);
3744 
3745                 /* See if we know about the socket. */
3746                 if (sk == NULL) 
3747                 {
3748                         if (!th->rst)
3749                                 tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3750                         skb->sk = NULL;
3751                         kfree_skb(skb, FREE_READ);
3752                         return(0);
3753                 }
3754 
3755                 skb->len = len;
3756                 skb->sk = sk;
3757                 skb->acked = 0;
3758                 skb->used = 0;
3759                 skb->free = 0;
3760                 skb->saddr = daddr;
3761                 skb->daddr = saddr;
3762         
3763                 /* We may need to add it to the backlog here. */
3764                 cli();
3765                 if (sk->inuse) 
3766                 {
3767                         skb_queue_head(&sk->back_log, skb);
3768                         sti();
3769                         return(0);
3770                 }
3771                 sk->inuse = 1;
3772                 sti();
3773         }
3774         else
3775         {
3776                 if (!sk) 
3777                 {
3778                         return(0);
3779                 }
3780         }
3781 
3782 
3783         if (!sk->prot) 
3784         {
3785                 return(0);
3786         }
3787 
3788 
3789         /*
3790          *      Charge the memory to the socket. 
3791          */
3792          
3793         if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) 
3794         {
3795                 skb->sk = NULL;
3796                 kfree_skb(skb, FREE_READ);
3797                 release_sock(sk);
3798                 return(0);
3799         }
3800 
3801         sk->rmem_alloc += skb->mem_len;
3802 
3803 #ifdef TCP_FASTPATH
3804 /*
3805  *      Incoming data stream fastpath. 
3806  *
3807  *      We try to optimise two things.
3808  *      1) Spot general data arriving without funny options and skip extra checks and the switch.
3809  *      2) Spot the common case in raw data receive streams of a packet that has no funny options,
3810  *      fits exactly on the end of the current queue and may or may not have the ack bit set.
3811  *
3812  *      Case two especially is done inline in this routine so there are no long jumps causing heavy
3813  *      cache thrashing, no function call overhead (except for the ack sending if needed) and for
3814  *      speed although further optimizing here is possible.
3815  */
3816  
3817         /* Im trusting gcc to optimise this sensibly... might need judicious application of a software mallet */
3818         if(!(sk->shutdown & RCV_SHUTDOWN) && sk->state==TCP_ESTABLISHED && !th->urg && !th->syn && !th->fin && !th->rst)
3819         {       
3820                 /* Packets in order. Fits window */
3821                 if(th->seq == sk->acked_seq+1 && sk->window && tcp_clean_end(sk))
3822                 {
3823                         /* Ack is harder */
3824                         if(th->ack && !tcp_ack(sk, th, saddr, len))
3825                         {
3826                                 kfree_skb(skb, FREE_READ);
3827                                 release_sock(sk);
3828                                 return 0;
3829                         }
3830                         /*
3831                          *      Set up variables
3832                          */
3833                         skb->len -= (th->doff *4);
3834                         sk->bytes_rcv += skb->len;
3835                         tcp_rx_hit2++;
3836                         if(skb->len)
3837                         {
3838                                 skb_queue_tail(&sk->receive_queue,skb); /* We already know where to put it */
3839                                 if(sk->window >= skb->len)
3840                                         sk->window-=skb->len;                   /* We know its effect on the window */
3841                                 else
3842                                         sk->window=0;
3843                                 sk->acked_seq = th->seq+skb->len;       /* Easy */
3844                                 skb->acked=1;                           /* Guaranteed true */
3845                                 if(!sk->delay_acks || sk->ack_backlog >= sk->max_ack_backlog || 
3846                                         sk->bytes_rcv > sk->max_unacked)
3847                                 {
3848                                         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th , saddr);
3849                                 }
3850                                 else
3851                                 {
3852                                         sk->ack_backlog++;
3853                                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3854                                 }
3855                                 if(!sk->dead)
3856                                         sk->data_ready(sk,0);
3857                                 release_sock(sk);
3858                                 return 0;
3859                         }
3860                 }
3861                 /*
3862                  *      More generic case of arriving data stream in ESTABLISHED
3863                  */
3864                 tcp_rx_hit1++;
3865                 if(!tcp_sequence(sk, th, len, opt, saddr, dev))
3866                 {
3867                         kfree_skb(skb, FREE_READ);
3868                         release_sock(sk);
3869                         return 0;
3870                 }
3871                 if(th->ack && !tcp_ack(sk, th, saddr, len))
3872                 {
3873                         kfree_skb(skb, FREE_READ);
3874                         release_sock(sk);
3875                         return 0;
3876                 }
3877                 if(tcp_data(skb, sk, saddr, len))
3878                         kfree_skb(skb, FREE_READ);
3879                 release_sock(sk);
3880                 return 0;
3881         }
3882         tcp_rx_miss++;
3883 #endif  
3884 
3885         /*
3886          *      Now deal with all cases.
3887          */
3888          
3889         switch(sk->state) 
3890         {
3891         
3892                 /*
3893                  * This should close the system down if it's waiting
3894                  * for an ack that is never going to be sent.
3895                  */
3896                 case TCP_LAST_ACK:
3897                         if (th->rst) 
3898                         {
3899                                 sk->zapped=1;
3900                                 sk->err = ECONNRESET;
3901                                 sk->state = TCP_CLOSE;
3902                                 sk->shutdown = SHUTDOWN_MASK;
3903                                 if (!sk->dead) 
3904                                 {
3905                                         sk->state_change(sk);
3906                                 }
3907                                 kfree_skb(skb, FREE_READ);
3908                                 release_sock(sk);
3909                                 return(0);
3910                         }
3911 
3912                 case TCP_ESTABLISHED:
3913                 case TCP_CLOSE_WAIT:
3914                 case TCP_CLOSING:
3915                 case TCP_FIN_WAIT1:
3916                 case TCP_FIN_WAIT2:
3917                 case TCP_TIME_WAIT:
3918                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
3919                         {
3920                                 kfree_skb(skb, FREE_READ);
3921                                 release_sock(sk);
3922                                 return(0);
3923                         }
3924 
3925                         if (th->rst) 
3926                         {
3927                                 tcp_statistics.TcpEstabResets++;
3928                                 tcp_statistics.TcpCurrEstab--;
3929                                 sk->zapped=1;
3930                                 /* This means the thing should really be closed. */
3931                                 sk->err = ECONNRESET;
3932                                 if (sk->state == TCP_CLOSE_WAIT) 
3933                                 {
3934                                         sk->err = EPIPE;
3935                                 }
3936         
3937                                 /*
3938                                  * A reset with a fin just means that
3939                                  * the data was not all read.
3940                                  */
3941                                 sk->state = TCP_CLOSE;
3942                                 sk->shutdown = SHUTDOWN_MASK;
3943                                 if (!sk->dead) 
3944                                 {
3945                                         sk->state_change(sk);
3946                                 }
3947                                 kfree_skb(skb, FREE_READ);
3948                                 release_sock(sk);
3949                                 return(0);
3950                         }
3951                         if (th->syn) 
3952                         {
3953                                 tcp_statistics.TcpCurrEstab--;
3954                                 tcp_statistics.TcpEstabResets++;
3955                                 sk->err = ECONNRESET;
3956                                 sk->state = TCP_CLOSE;
3957                                 sk->shutdown = SHUTDOWN_MASK;
3958                                 tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
3959                                 if (!sk->dead) {
3960                                         sk->state_change(sk);
3961                                 }
3962                                 kfree_skb(skb, FREE_READ);
3963                                 release_sock(sk);
3964                                 return(0);
3965                         }
3966         
3967                         if (th->ack && !tcp_ack(sk, th, saddr, len)) {
3968                                 kfree_skb(skb, FREE_READ);
3969                                 release_sock(sk);
3970                                 return(0);
3971                         }
3972         
3973                         if (tcp_urg(sk, th, saddr, len)) {
3974                                 kfree_skb(skb, FREE_READ);
3975                                 release_sock(sk);
3976                                 return(0);
3977                         }
3978 
3979         
3980                         if (tcp_data(skb, sk, saddr, len)) {
3981                                 kfree_skb(skb, FREE_READ);
3982                                 release_sock(sk);
3983                                 return(0);
3984                         }       
3985 
3986                         if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) {
3987                                 kfree_skb(skb, FREE_READ);
3988                                 release_sock(sk);
3989                                 return(0);
3990                         }
3991         
3992                         release_sock(sk);
3993                         return(0);
3994                 
3995                 case TCP_CLOSE:
3996                         if (sk->dead || sk->daddr) {
3997                                 kfree_skb(skb, FREE_READ);
3998                                         release_sock(sk);
3999                                 return(0);
4000                         }
4001         
4002                         if (!th->rst) {
4003                                 if (!th->ack)
4004                                         th->ack_seq = 0;
4005                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4006                         }
4007                         kfree_skb(skb, FREE_READ);
4008                         release_sock(sk);
4009                                 return(0);
4010         
4011                 case TCP_LISTEN:
4012                         if (th->rst) {
4013                                 kfree_skb(skb, FREE_READ);
4014                                 release_sock(sk);
4015                                 return(0);
4016                         }
4017                         if (th->ack) {
4018                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4019                                 kfree_skb(skb, FREE_READ);
4020                                 release_sock(sk);
4021                                 return(0);
4022                         }
4023         
4024                         if (th->syn) 
4025                         {
4026                                 /*
4027                                  * Now we just put the whole thing including
4028                                  * the header and saddr, and protocol pointer
4029                                  * into the buffer.  We can't respond until the
4030                                  * user tells us to accept the connection.
4031                                  */
4032                                 tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
4033                                 release_sock(sk);
4034                                 return(0);
4035                         }
4036 
4037                         kfree_skb(skb, FREE_READ);
4038                         release_sock(sk);
4039                         return(0);
4040 
4041                 case TCP_SYN_RECV:
4042                         if (th->syn) {
4043                                 /* Probably a retransmitted syn */
4044                                 kfree_skb(skb, FREE_READ);
4045                                 release_sock(sk);
4046                                 return(0);
4047                         }
4048         
4049         
4050                 default:
4051                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
4052                         {
4053                                 kfree_skb(skb, FREE_READ);
4054                                 release_sock(sk);
4055                                 return(0);
4056                         }
4057         
4058                 case TCP_SYN_SENT:
4059                         if (th->rst) 
4060                         {
4061                                 tcp_statistics.TcpAttemptFails++;
4062                                 sk->err = ECONNREFUSED;
4063                                 sk->state = TCP_CLOSE;
4064                                 sk->shutdown = SHUTDOWN_MASK;
4065                                 sk->zapped = 1;
4066                                 if (!sk->dead) 
4067                                 {
4068                                         sk->state_change(sk);
4069                                 }
4070                                 kfree_skb(skb, FREE_READ);
4071                                 release_sock(sk);
4072                                 return(0);
4073                         }
4074                         if (!th->ack) 
4075                         {
4076                                 if (th->syn) 
4077                                 {
4078                                         sk->state = TCP_SYN_RECV;
4079                                 }
4080                                 kfree_skb(skb, FREE_READ);
4081                                 release_sock(sk);
4082                                 return(0);
4083                         }
4084         
4085                         switch(sk->state) 
4086                         {
4087                                 case TCP_SYN_SENT:
4088                                         if (!tcp_ack(sk, th, saddr, len)) 
4089                                         {
4090                                                 tcp_statistics.TcpAttemptFails++;
4091                                                 tcp_reset(daddr, saddr, th,
4092                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4093                                                 kfree_skb(skb, FREE_READ);
4094                                                         release_sock(sk);
4095                                                 return(0);
4096                                         }
4097         
4098                                         /*
4099                                          * If the syn bit is also set, switch to
4100                                          * tcp_syn_recv, and then to established.
4101                                          */
4102                                         if (!th->syn) 
4103                                         {
4104                                                 kfree_skb(skb, FREE_READ);
4105                                                 release_sock(sk);
4106                                                 return(0);
4107                                         }
4108         
4109                                         /* Ack the syn and fall through. */
4110                                         sk->acked_seq = th->seq+1;
4111                                         sk->fin_seq = th->seq;
4112                                         tcp_send_ack(sk->sent_seq, th->seq+1,
4113                                                 sk, th, sk->daddr);
4114                 
4115                                 case TCP_SYN_RECV:
4116                                         if (!tcp_ack(sk, th, saddr, len)) 
4117                                         {
4118                                                 tcp_statistics.TcpAttemptFails++;
4119                                                 tcp_reset(daddr, saddr, th,
4120                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
4121                                                 kfree_skb(skb, FREE_READ);
4122                                                 release_sock(sk);
4123                                                 return(0);
4124                                         }
4125         
4126                                         tcp_statistics.TcpCurrEstab++;
4127                                         sk->state = TCP_ESTABLISHED;
4128         
4129                                         /*
4130                                          *      Now we need to finish filling out
4131                                          *      some of the tcp header.
4132                                          * 
4133                                          *      We need to check for mtu info. 
4134                                          */
4135                                         tcp_options(sk, th);
4136                                         sk->dummy_th.dest = th->source;
4137                                         sk->copied_seq = sk->acked_seq-1;
4138                                         if (!sk->dead) 
4139                                         {
4140                                                 sk->state_change(sk);
4141                                         }
4142         
4143                                         /*
4144                                          * We've already processed his first
4145                                          * ack.  In just about all cases that
4146                                          * will have set max_window.  This is
4147                                          * to protect us against the possibility
4148                                          * that the initial window he sent was 0.
4149                                          * This must occur after tcp_options, which
4150                                          * sets sk->mtu.
4151                                          */
4152                                         if (sk->max_window == 0) 
4153                                         {
4154                                                 sk->max_window = 32;
4155                                                 sk->mss = min(sk->max_window, sk->mtu);
4156                                         }
4157 
4158                                         /*
4159                                          * Now process the rest like we were
4160                                          * already in the established state.
4161                                          */
4162                                         if (th->urg) 
4163                                         {
4164                                                 if (tcp_urg(sk, th, saddr, len)) 
4165                                                 { 
4166                                                         kfree_skb(skb, FREE_READ);
4167                                                         release_sock(sk);
4168                                                         return(0);
4169                                                 }
4170                                         }
4171                                         if (tcp_data(skb, sk, saddr, len))
4172                                                 kfree_skb(skb, FREE_READ);
4173 
4174                                         if (th->fin)
4175                                                 tcp_fin(skb, sk, th, saddr, dev);
4176                                         release_sock(sk);
4177                                         return(0);
4178                         }
4179         
4180                         if (th->urg) 
4181                         {
4182                                 if (tcp_urg(sk, th, saddr, len)) 
4183                                 {
4184                                         kfree_skb(skb, FREE_READ);
4185                                         release_sock(sk);
4186                                         return(0);
4187                                 }
4188                         }
4189                         if (tcp_data(skb, sk, saddr, len)) 
4190                         {
4191                                 kfree_skb(skb, FREE_READ);
4192                                 release_sock(sk);
4193                                 return(0);
4194                         }
4195         
4196                         if (!th->fin) 
4197                         {
4198                                 release_sock(sk);
4199                                 return(0);
4200                         }
4201                         tcp_fin(skb, sk, th, saddr, dev);
4202                         release_sock(sk);
4203                         return(0);
4204         }
4205 }
4206 
4207 
4208 /*
4209  * This routine sends a packet with an out of date sequence
4210  * number. It assumes the other end will try to ack it.
4211  */
4212 
4213 static void tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4214 {
4215         struct sk_buff *buff;
4216         struct tcphdr *t1;
4217         struct device *dev=NULL;
4218         int tmp;
4219 
4220         if (sk->zapped)
4221                 return; /* Afer a valid reset we can send no more */
4222 
4223         /*
4224          * Write data can still be transmitted/retransmitted in the
4225          * following states.  If any other state is encountered, return.
4226          */
4227 
4228         if (sk->state != TCP_ESTABLISHED && 
4229             sk->state != TCP_CLOSE_WAIT &&
4230             sk->state != TCP_FIN_WAIT1 && 
4231             sk->state != TCP_LAST_ACK &&
4232             sk->state != TCP_CLOSING
4233         ) {
4234                 return;
4235         }
4236 
4237         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
4238         if (buff == NULL) 
4239                 return;
4240 
4241         buff->len = sizeof(struct tcphdr);
4242         buff->free = 1;
4243         buff->sk = sk;
4244         buff->localroute = sk->localroute;
4245 
4246         t1 = (struct tcphdr *) buff->data;
4247 
4248         /* Put in the IP header and routing stuff. */
4249         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
4250                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
4251         if (tmp < 0) 
4252         {
4253                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
4254                 return;
4255         }
4256 
4257         buff->len += tmp;
4258         t1 = (struct tcphdr *)((char *)t1 +tmp);
4259 
4260         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
4261 
4262         /*
4263          * Use a previous sequence.
4264          * This should cause the other end to send an ack.
4265          */
4266         t1->seq = htonl(sk->sent_seq-1);
4267         t1->ack = 1; 
4268         t1->res1= 0;
4269         t1->res2= 0;
4270         t1->rst = 0;
4271         t1->urg = 0;
4272         t1->psh = 0;
4273         t1->fin = 0;
4274         t1->syn = 0;
4275         t1->ack_seq = ntohl(sk->acked_seq);
4276         t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
4277         t1->doff = sizeof(*t1)/4;
4278         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
4279 
4280          /*     Send it and free it.
4281           *     This will prevent the timer from automatically being restarted.
4282           */
4283         sk->prot->queue_xmit(sk, dev, buff, 1);
4284         tcp_statistics.TcpOutSegs++;
4285 }
4286 
4287 void
4288 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4289 {
4290         if (sk->zapped)
4291                 return;         /* Afer a valid reset we can send no more */
4292 
4293         tcp_write_wakeup(sk);
4294 
4295         sk->backoff++;
4296         sk->rto = min(sk->rto << 1, 120*HZ);
4297         reset_timer (sk, TIME_PROBE0, sk->rto);
4298         sk->retransmits++;
4299         sk->prot->retransmits ++;
4300 }
4301 
4302 /*
4303  *      Socket option code for TCP. 
4304  */
4305   
4306 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4307 {
4308         int val,err;
4309 
4310         if(level!=SOL_TCP)
4311                 return ip_setsockopt(sk,level,optname,optval,optlen);
4312 
4313         if (optval == NULL) 
4314                 return(-EINVAL);
4315 
4316         err=verify_area(VERIFY_READ, optval, sizeof(int));
4317         if(err)
4318                 return err;
4319         
4320         val = get_fs_long((unsigned long *)optval);
4321 
4322         switch(optname)
4323         {
4324                 case TCP_MAXSEG:
4325 /*                      if(val<200||val>2048 || val>sk->mtu) */
4326 /*
4327  * values greater than interface MTU won't take effect.  however at
4328  * the point when this call is done we typically don't yet know
4329  * which interface is going to be used
4330  */
4331                         if(val<1||val>MAX_WINDOW)
4332                                 return -EINVAL;
4333                         sk->user_mss=val;
4334                         return 0;
4335                 case TCP_NODELAY:
4336                         sk->nonagle=(val==0)?0:1;
4337                         return 0;
4338                 default:
4339                         return(-ENOPROTOOPT);
4340         }
4341 }
4342 
4343 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4344 {
4345         int val,err;
4346 
4347         if(level!=SOL_TCP)
4348                 return ip_getsockopt(sk,level,optname,optval,optlen);
4349                         
4350         switch(optname)
4351         {
4352                 case TCP_MAXSEG:
4353                         val=sk->user_mss;
4354                         break;
4355                 case TCP_NODELAY:
4356                         val=sk->nonagle;        /* Until Johannes stuff is in */
4357                         break;
4358                 default:
4359                         return(-ENOPROTOOPT);
4360         }
4361         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
4362         if(err)
4363                 return err;
4364         put_fs_long(sizeof(int),(unsigned long *) optlen);
4365 
4366         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
4367         if(err)
4368                 return err;
4369         put_fs_long(val,(unsigned long *)optval);
4370 
4371         return(0);
4372 }       
4373 
4374 
4375 struct proto tcp_prot = {
4376         sock_wmalloc,
4377         sock_rmalloc,
4378         sock_wfree,
4379         sock_rfree,
4380         sock_rspace,
4381         sock_wspace,
4382         tcp_close,
4383         tcp_read,
4384         tcp_write,
4385         tcp_sendto,
4386         tcp_recvfrom,
4387         ip_build_header,
4388         tcp_connect,
4389         tcp_accept,
4390         ip_queue_xmit,
4391         tcp_retransmit,
4392         tcp_write_wakeup,
4393         tcp_read_wakeup,
4394         tcp_rcv,
4395         tcp_select,
4396         tcp_ioctl,
4397         NULL,
4398         tcp_shutdown,
4399         tcp_setsockopt,
4400         tcp_getsockopt,
4401         128,
4402         0,
4403         {NULL,},
4404         "TCP"
4405 };

/* [previous][next][first][last][top][bottom][index][help] */