root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. tcp_set_state
  3. tcp_select_window
  4. tcp_find_established
  5. tcp_dequeue_established
  6. tcp_time_wait
  7. tcp_retransmit
  8. tcp_err
  9. tcp_readable
  10. tcp_select
  11. tcp_ioctl
  12. tcp_check
  13. tcp_send_check
  14. tcp_send_skb
  15. tcp_dequeue_partial
  16. tcp_send_partial
  17. tcp_enqueue_partial
  18. tcp_send_ack
  19. tcp_build_header
  20. tcp_write
  21. tcp_sendto
  22. tcp_read_wakeup
  23. cleanup_rbuf
  24. tcp_read_urg
  25. tcp_read
  26. tcp_shutdown
  27. tcp_recvfrom
  28. tcp_reset
  29. tcp_options
  30. default_mask
  31. tcp_conn_request
  32. tcp_close
  33. tcp_write_xmit
  34. tcp_ack
  35. tcp_data
  36. tcp_check_urg
  37. tcp_urg
  38. tcp_fin
  39. tcp_accept
  40. tcp_connect
  41. tcp_sequence
  42. tcp_clean_end
  43. tcp_rcv
  44. tcp_write_wakeup
  45. tcp_send_probe0
  46. tcp_setsockopt
  47. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@no.unit.nvg>
  20  *
  21  * Fixes:       
  22  *              Alan Cox        :       Numerous verify_area() calls
  23  *              Alan Cox        :       Set the ACK bit on a reset
  24  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  25  *                                      and was trying to connect (tcp_err()).
  26  *              Alan Cox        :       All icmp error handling was broken
  27  *                                      pointers passed where wrong and the
  28  *                                      socket was looked up backwards. Nobody
  29  *                                      tested any icmp error code obviously.
  30  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  31  *                                      on errors. select behaves and the icmp error race
  32  *                                      has gone by moving it into sock.c
  33  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  34  *                                      packets for unknown sockets.
  35  *              Alan Cox        :       tcp option processing.
  36  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  37  *              Herp Rosmanith  :       More reset fixes
  38  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  39  *                                      any kind of RST is right out.
  40  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  41  *                                      otherwise odd bits of prattle escape still
  42  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  43  *                                      LAN workplace lockups.
  44  *              Alan Cox        :       Some tidyups using the new skb list facilities
  45  *              Alan Cox        :       sk->keepopen now seems to work
  46  *              Alan Cox        :       Pulls options out correctly on accepts
  47  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  48  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  49  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  50  *              Alan Cox        :       Added some beter commenting, as the tcp is hard to follow
  51  *              Alan Cox        :       Removed incorrect check for 20 * psh
  52  *      Michael O'Reilly        :       ack < copied bug fix.
  53  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  54  *              Alan Cox        :       FIN with no memory -> CRASH
  55  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  56  *              Alan Cox        :       Added TCP options (SOL_TCP)
  57  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  58  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  59  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  60  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  61  *              Alan Cox        :       Put in missing check for SYN bit.
  62  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  63  *                                      window non shrink trick.
  64  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  65  *              Charles Hedrick :       TCP fixes
  66  *              Toomas Tamm     :       TCP window fixes
  67  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  68  *              Charles Hedrick :       Rewrote most of it to actually work
  69  *              Linus           :       Rewrote tcp_read() and URG handling
  70  *                                      completely
  71  *              Gerhard Koerting:       Fixed some missing timer handling
  72  *              Matthew Dillon  :       Reworked TCP machine states as per RFC
  73  *              Gerhard Koerting:       PC/TCP workarounds
  74  *              Adam Caldwell   :       Assorted timer/timing errors
  75  *              Matthew Dillon  :       Fixed another RST bug
  76  *              Alan Cox        :       Move to kernel side addressing changes.
  77  *              Alan Cox        :       Beginning work on TCP fastpathing (not yet usable)
  78  *              Arnt Gulbrandsen:       Turbocharged tcp_check() routine.
  79  *              Alan Cox        :       TCP fast path debugging
  80  *              Alan Cox        :       Window clamping
  81  *              Michael Riepe   :       Bug in tcp_check()
  82  *              Matt Dillon     :       More TCP improvements and RST bug fixes
  83  *              Matt Dillon     :       Yet more small nasties remove from the TCP code
  84  *                                      (Be very nice to this man if tcp finally works 100%) 8)
  85  *              Alan Cox        :       BSD accept semantics. 
  86  *              Alan Cox        :       Reset on closedown bug.
  87  *      Peter De Schrijver      :       ENOTCONN check missing in tcp_sendto().
  88  *
  89  *
  90  * To Fix:
  91  *                      Fast path the code. Two things here - fix the window calculation
  92  *              so it doesn't iterate over the queue, also spot packets with no funny
  93  *              options arriving in order and process directly.
  94  *
  95  *              This program is free software; you can redistribute it and/or
  96  *              modify it under the terms of the GNU General Public License
  97  *              as published by the Free Software Foundation; either version
  98  *              2 of the License, or(at your option) any later version.
  99  *
 100  * Description of States:
 101  *
 102  *      TCP_SYN_SENT            sent a connection request, waiting for ack
 103  *
 104  *      TCP_SYN_RECV            received a connection request, sent ack,
 105  *                              waiting for final ack in three-way handshake.
 106  *
 107  *      TCP_ESTABLISHED         connection established
 108  *
 109  *      TCP_FIN_WAIT1           our side has shutdown, waiting to complete
 110  *                              transmission of remaining buffered data
 111  *
 112  *      TCP_FIN_WAIT2           all buffered data sent, waiting for remote
 113  *                              to shutdown
 114  *
 115  *      TCP_CLOSING             both sides have shutdown but we still have
 116  *                              data we have to finish sending
 117  *
 118  *      TCP_TIME_WAIT           timeout to catch resent junk before entering
 119  *                              closed, can only be entered from FIN_WAIT2
 120  *                              or CLOSING.  Required because the other end
 121  *                              may not have gotten our last ACK causing it
 122  *                              to retransmit the data packet (which we ignore)
 123  *
 124  *      TCP_CLOSE_WAIT          remote side has shutdown and is waiting for
 125  *                              us to finish writing our data and to shutdown
 126  *                              (we have to close() to move on to LAST_ACK)
 127  *
 128  *      TCP_LAST_ACK            out side has shutdown after remote has
 129  *                              shutdown.  There may still be data in our
 130  *                              buffer that we have to finish sending
 131  *              
 132  *      TCP_CLOSE               socket is finished
 133  */
 134 #include <linux/types.h>
 135 #include <linux/sched.h>
 136 #include <linux/mm.h>
 137 #include <linux/string.h>
 138 #include <linux/socket.h>
 139 #include <linux/sockios.h>
 140 #include <linux/termios.h>
 141 #include <linux/in.h>
 142 #include <linux/fcntl.h>
 143 #include <linux/inet.h>
 144 #include <linux/netdevice.h>
 145 #include "snmp.h"
 146 #include "ip.h"
 147 #include "protocol.h"
 148 #include "icmp.h"
 149 #include "tcp.h"
 150 #include <linux/skbuff.h>
 151 #include "sock.h"
 152 #include "route.h"
 153 #include <linux/errno.h>
 154 #include <linux/timer.h>
 155 #include <asm/system.h>
 156 #include <asm/segment.h>
 157 #include <linux/mm.h>
 158 
 159 #undef TCP_FASTPATH
 160 
 161 #define SEQ_TICK 3
 162 unsigned long seq_offset;
 163 struct tcp_mib  tcp_statistics;
 164 
 165 #ifdef TCP_FASTPATH
 166 unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0;
 167 #endif
 168 
 169 
 170 static __inline__ int min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 171 {
 172         if (a < b) 
 173                 return(a);
 174         return(b);
 175 }
 176 
 177 #undef STATE_TRACE
 178 
 179 static __inline__ void tcp_set_state(struct sock *sk, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 180 {
 181         if(sk->state==TCP_ESTABLISHED)
 182                 tcp_statistics.TcpCurrEstab--;
 183 #ifdef STATE_TRACE
 184         if(sk->debug)
 185                 printk("TCP sk=%s, State %d -> %d\n",sk, sk->state,state);
 186 #endif  
 187         sk->state=state;
 188         if(state==TCP_ESTABLISHED)
 189                 tcp_statistics.TcpCurrEstab++;
 190 }
 191 
 192 /* This routine picks a TCP windows for a socket based on
 193    the following constraints
 194    
 195    1. The window can never be shrunk once it is offered (RFC 793)
 196    2. We limit memory per socket
 197    
 198    For now we use NET2E3's heuristic of offering half the memory
 199    we have handy. All is not as bad as this seems however because
 200    of two things. Firstly we will bin packets even within the window
 201    in order to get the data we are waiting for into the memory limit.
 202    Secondly we bin common duplicate forms at receive time
 203    
 204    Better heuristics welcome
 205 */
 206    
 207 int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 208 {
 209         int new_window = sk->prot->rspace(sk);
 210         
 211         if(sk->window_clamp)
 212                 new_window=min(sk->window_clamp,new_window);
 213 /*
 214  * two things are going on here.  First, we don't ever offer a
 215  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 216  * receiver side of SWS as specified in RFC1122.
 217  * Second, we always give them at least the window they
 218  * had before, in order to avoid retracting window.  This
 219  * is technically allowed, but RFC1122 advises against it and
 220  * in practice it causes trouble.
 221  */
 222         if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window)
 223                 return(sk->window);
 224         return(new_window);
 225 }
 226 
 227 /*
 228  *      Find someone to 'accept'. Must be called with
 229  *      sk->inuse=1 or cli()
 230  */ 
 231 
 232 static struct sk_buff *tcp_find_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 233 {
 234         struct sk_buff *p=skb_peek(&s->receive_queue);
 235         if(p==NULL)
 236                 return NULL;
 237         do
 238         {
 239                 if(p->sk->state>=TCP_ESTABLISHED)
 240                         return p;
 241                 p=p->next;
 242         }
 243         while(p!=skb_peek(&s->receive_queue));
 244         return NULL;
 245 }
 246 
 247 static struct sk_buff *tcp_dequeue_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 248 {
 249         struct sk_buff *skb;
 250         unsigned long flags;
 251         save_flags(flags);
 252         cli(); 
 253         skb=tcp_find_established(s);
 254         if(skb!=NULL)
 255                 skb_unlink(skb);        /* Take it off the queue */
 256         restore_flags(flags);
 257         return skb;
 258 }
 259 
 260 
 261 /*
 262  *      Enter the time wait state. 
 263  */
 264 
 265 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 266 {
 267         tcp_set_state(sk,TCP_TIME_WAIT);
 268         sk->shutdown = SHUTDOWN_MASK;
 269         if (!sk->dead)
 270                 sk->state_change(sk);
 271         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 272 }
 273 
 274 /*
 275  *      A timer event has trigger a tcp retransmit timeout. The
 276  *      socket xmit queue is ready and set up to send. Because
 277  *      the ack receive code keeps the queue straight we do
 278  *      nothing clever here.
 279  */
 280 
 281 static void tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 282 {
 283         if (all) 
 284         {
 285                 ip_retransmit(sk, all);
 286                 return;
 287         }
 288 
 289         sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 290         /* sk->ssthresh in theory can be zero.  I guess that's OK */
 291         sk->cong_count = 0;
 292 
 293         sk->cong_window = 1;
 294 
 295         /* Do the actual retransmit. */
 296         ip_retransmit(sk, all);
 297 }
 298 
 299 
 300 /*
 301  * This routine is called by the ICMP module when it gets some
 302  * sort of error condition.  If err < 0 then the socket should
 303  * be closed and the error returned to the user.  If err > 0
 304  * it's just the icmp type << 8 | icmp code.  After adjustment
 305  * header points to the first 8 bytes of the tcp header.  We need
 306  * to find the appropriate port.
 307  */
 308 
 309 void tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 310         unsigned long saddr, struct inet_protocol *protocol)
 311 {
 312         struct tcphdr *th;
 313         struct sock *sk;
 314         struct iphdr *iph=(struct iphdr *)header;
 315   
 316         header+=4*iph->ihl;
 317    
 318 
 319         th =(struct tcphdr *)header;
 320         sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr);
 321 
 322         if (sk == NULL) 
 323                 return;
 324   
 325         if(err<0)
 326         {
 327                 sk->err = -err;
 328                 sk->error_report(sk);
 329                 return;
 330         }
 331 
 332         if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) 
 333         {
 334                 /*
 335                  * FIXME:
 336                  * For now we will just trigger a linear backoff.
 337                  * The slow start code should cause a real backoff here.
 338                  */
 339                 if (sk->cong_window > 4)
 340                         sk->cong_window--;
 341                 return;
 342         }
 343 
 344 /*      sk->err = icmp_err_convert[err & 0xff].errno;  -- moved as TCP should hide non fatals internally (and does) */
 345 
 346         /*
 347          * If we've already connected we will keep trying
 348          * until we time out, or the user gives up.
 349          */
 350 
 351         if (icmp_err_convert[err & 0xff].fatal || sk->state == TCP_SYN_SENT) 
 352         {
 353                 if (sk->state == TCP_SYN_SENT) 
 354                 {
 355                         tcp_statistics.TcpAttemptFails++;
 356                         tcp_set_state(sk,TCP_CLOSE);
 357                         sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 358                 }
 359                 sk->err = icmp_err_convert[err & 0xff].errno;           
 360         }
 361         return;
 362 }
 363 
 364 
 365 /*
 366  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 367  *      in the received data queue (ie a frame missing that needs sending to us)
 368  */
 369 
 370 static int tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 371 {
 372         unsigned long counted;
 373         unsigned long amount;
 374         struct sk_buff *skb;
 375         int sum;
 376         unsigned long flags;
 377 
 378         if(sk && sk->debug)
 379                 printk("tcp_readable: %p - ",sk);
 380 
 381         save_flags(flags);
 382         cli();
 383         if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 384         {
 385                 restore_flags(flags);
 386                 if(sk && sk->debug) 
 387                         printk("empty\n");
 388                 return(0);
 389         }
 390   
 391         counted = sk->copied_seq+1;     /* Where we are at the moment */
 392         amount = 0;
 393   
 394         /* Do until a push or until we are out of data. */
 395         do 
 396         {
 397                 if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 398                         break;
 399                 sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 400                 if (skb->h.th->syn)
 401                         sum++;
 402                 if (sum > 0) 
 403                 {                                       /* Add it up, move on */
 404                         amount += sum;
 405                         if (skb->h.th->syn) 
 406                                 amount--;
 407                         counted += sum;
 408                 }
 409                 if (amount && skb->h.th->psh) break;
 410                 skb = skb->next;
 411         }
 412         while(skb != (struct sk_buff *)&sk->receive_queue);
 413 
 414         if (amount && !sk->urginline && sk->urg_data &&
 415             (sk->urg_seq - sk->copied_seq) <= (counted - sk->copied_seq))
 416                 amount--;               /* don't count urg data */
 417         restore_flags(flags);
 418         if(sk->debug)
 419                 printk("got %lu bytes.\n",amount);
 420         return(amount);
 421 }
 422 
 423 
 424 /*
 425  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 426  *      listening socket has a receive queue of sockets to accept.
 427  */
 428 
 429 static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 430 {
 431         sk->inuse = 1;
 432 
 433         switch(sel_type) 
 434         {
 435                 case SEL_IN:
 436                         if(sk->debug)
 437                                 printk("select in");
 438                         select_wait(sk->sleep, wait);
 439                         if(sk->debug)
 440                                 printk("-select out");
 441                         if (skb_peek(&sk->receive_queue) != NULL) 
 442                         {
 443                                 if ((sk->state == TCP_LISTEN && tcp_find_established(sk)) || tcp_readable(sk)) 
 444                                 {
 445                                         release_sock(sk);
 446                                         if(sk->debug)
 447                                                 printk("-select ok data\n");
 448                                         return(1);
 449                                 }
 450                         }
 451                         if (sk->err != 0)       /* Receiver error */
 452                         {
 453                                 release_sock(sk);
 454                                 if(sk->debug)
 455                                         printk("-select ok error");
 456                                 return(1);
 457                         }
 458                         if (sk->shutdown & RCV_SHUTDOWN) 
 459                         {
 460                                 release_sock(sk);
 461                                 if(sk->debug)
 462                                         printk("-select ok down\n");
 463                                 return(1);
 464                         } 
 465                         else 
 466                         {
 467                                 release_sock(sk);
 468                                 if(sk->debug)
 469                                         printk("-select fail\n");
 470                                 return(0);
 471                         }
 472                 case SEL_OUT:
 473                         select_wait(sk->sleep, wait);
 474                         if (sk->shutdown & SEND_SHUTDOWN) 
 475                         {
 476                                 /* FIXME: should this return an error? */
 477                                 release_sock(sk);
 478                                 return(0);
 479                         }
 480 
 481                         /*
 482                          * This is now right thanks to a small fix
 483                          * by Matt Dillon.
 484                          */
 485                         
 486                         if (sk->prot->wspace(sk) >= sk->mtu+128+sk->prot->max_header) 
 487                         {
 488                                 release_sock(sk);
 489                                 /* This should cause connect to work ok. */
 490                                 if (sk->state == TCP_SYN_RECV ||
 491                                     sk->state == TCP_SYN_SENT) return(0);
 492                                 return(1);
 493                         }
 494                         release_sock(sk);
 495                         return(0);
 496                 case SEL_EX:
 497                         select_wait(sk->sleep,wait);
 498                         if (sk->err || sk->urg_data) 
 499                         {
 500                                 release_sock(sk);
 501                                 return(1);
 502                         }
 503                         release_sock(sk);
 504                         return(0);
 505         }
 506 
 507         release_sock(sk);
 508         return(0);
 509 }
 510 
 511 
 512 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 513 {
 514         int err;
 515         switch(cmd) 
 516         {
 517 
 518                 case TIOCINQ:
 519 #ifdef FIXME    /* FIXME: */
 520                 case FIONREAD:
 521 #endif
 522                 {
 523                         unsigned long amount;
 524 
 525                         if (sk->state == TCP_LISTEN) 
 526                                 return(-EINVAL);
 527 
 528                         sk->inuse = 1;
 529                         amount = tcp_readable(sk);
 530                         release_sock(sk);
 531                         err=verify_area(VERIFY_WRITE,(void *)arg,
 532                                                    sizeof(unsigned long));
 533                         if(err)
 534                                 return err;
 535                         put_fs_long(amount,(unsigned long *)arg);
 536                         return(0);
 537                 }
 538                 case SIOCATMARK:
 539                 {
 540                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
 541 
 542                         err = verify_area(VERIFY_WRITE,(void *) arg,
 543                                                   sizeof(unsigned long));
 544                         if (err)
 545                                 return err;
 546                         put_fs_long(answ,(int *) arg);
 547                         return(0);
 548                 }
 549                 case TIOCOUTQ:
 550                 {
 551                         unsigned long amount;
 552 
 553                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 554                         amount = sk->prot->wspace(sk);
 555                         err=verify_area(VERIFY_WRITE,(void *)arg,
 556                                                    sizeof(unsigned long));
 557                         if(err)
 558                                 return err;
 559                         put_fs_long(amount,(unsigned long *)arg);
 560                         return(0);
 561                 }
 562                 default:
 563                         return(-EINVAL);
 564         }
 565 }
 566 
 567 
 568 /*
 569  *      This routine computes a TCP checksum. 
 570  */
 571  
 572 unsigned short tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 573           unsigned long saddr, unsigned long daddr)
 574 {     
 575         unsigned long sum;
 576    
 577         if (saddr == 0) saddr = ip_my_addr();
 578 
 579 /*
 580  * stupid, gcc complains when I use just one __asm__ block,
 581  * something about too many reloads, but this is just two
 582  * instructions longer than what I want
 583  */
 584         __asm__("
 585             addl %%ecx, %%ebx
 586             adcl %%edx, %%ebx
 587             adcl $0, %%ebx
 588             "
 589         : "=b"(sum)
 590         : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 591         : "bx", "cx", "dx" );
 592         __asm__("
 593             movl %%ecx, %%edx
 594             cld
 595             cmpl $32, %%ecx
 596             jb 2f
 597             shrl $5, %%ecx
 598             clc
 599 1:          lodsl
 600             adcl %%eax, %%ebx
 601             lodsl
 602             adcl %%eax, %%ebx
 603             lodsl
 604             adcl %%eax, %%ebx
 605             lodsl
 606             adcl %%eax, %%ebx
 607             lodsl
 608             adcl %%eax, %%ebx
 609             lodsl
 610             adcl %%eax, %%ebx
 611             lodsl
 612             adcl %%eax, %%ebx
 613             lodsl
 614             adcl %%eax, %%ebx
 615             loop 1b
 616             adcl $0, %%ebx
 617             movl %%edx, %%ecx
 618 2:          andl $28, %%ecx
 619             je 4f
 620             shrl $2, %%ecx
 621             clc
 622 3:          lodsl
 623             adcl %%eax, %%ebx
 624             loop 3b
 625             adcl $0, %%ebx
 626 4:          movl $0, %%eax
 627             testw $2, %%dx
 628             je 5f
 629             lodsw
 630             addl %%eax, %%ebx
 631             adcl $0, %%ebx
 632             movw $0, %%ax
 633 5:          test $1, %%edx
 634             je 6f
 635             lodsb
 636             addl %%eax, %%ebx
 637             adcl $0, %%ebx
 638 6:          movl %%ebx, %%eax
 639             shrl $16, %%eax
 640             addw %%ax, %%bx
 641             adcw $0, %%bx
 642             "
 643         : "=b"(sum)
 644         : "0"(sum), "c"(len), "S"(th)
 645         : "ax", "bx", "cx", "dx", "si" );
 646 
 647         /* We only want the bottom 16 bits, but we never cleared the top 16. */
 648   
 649         return((~sum) & 0xffff);
 650 }
 651 
 652 
 653 
 654 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 655                 unsigned long daddr, int len, struct sock *sk)
 656 {
 657         th->check = 0;
 658         th->check = tcp_check(th, len, saddr, daddr);
 659         return;
 660 }
 661 
 662 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 663 {
 664         int size;
 665         struct tcphdr * th = skb->h.th;
 666 
 667         /* length of packet (not counting length of pre-tcp headers) */
 668         size = skb->len - ((unsigned char *) th - skb->data);
 669 
 670         /* sanity check it.. */
 671         if (size < sizeof(struct tcphdr) || size > skb->len) 
 672         {
 673                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 674                         skb, skb->data, th, skb->len);
 675                 kfree_skb(skb, FREE_WRITE);
 676                 return;
 677         }
 678 
 679         /* If we have queued a header size packet.. */
 680         if (size == sizeof(struct tcphdr)) 
 681         {
 682                 /* If its got a syn or fin its notionally included in the size..*/
 683                 if(!th->syn && !th->fin) 
 684                 {
 685                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 686                         kfree_skb(skb,FREE_WRITE);
 687                         return;
 688                 }
 689         }
 690 
 691         tcp_statistics.TcpOutSegs++;  
 692 
 693         skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
 694         if (after(skb->h.seq, sk->window_seq) ||
 695             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 696              sk->packets_out >= sk->cong_window) 
 697         {
 698                 /* checksum will be supplied by tcp_write_xmit.  So
 699                  * we shouldn't need to set it at all.  I'm being paranoid */
 700                 th->check = 0;
 701                 if (skb->next != NULL) 
 702                 {
 703                         printk("tcp_send_partial: next != NULL\n");
 704                         skb_unlink(skb);
 705                 }
 706                 skb_queue_tail(&sk->write_queue, skb);
 707                 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 708                     sk->send_head == NULL &&
 709                     sk->ack_backlog == 0)
 710                         reset_timer(sk, TIME_PROBE0, sk->rto);
 711         } 
 712         else 
 713         {
 714                 th->ack_seq = ntohl(sk->acked_seq);
 715                 th->window = ntohs(tcp_select_window(sk));
 716 
 717                 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
 718 
 719                 sk->sent_seq = sk->write_seq;
 720                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 721         }
 722 }
 723 
 724 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 725 {
 726         struct sk_buff * skb;
 727         unsigned long flags;
 728 
 729         save_flags(flags);
 730         cli();
 731         skb = sk->partial;
 732         if (skb) {
 733                 sk->partial = NULL;
 734                 del_timer(&sk->partial_timer);
 735         }
 736         restore_flags(flags);
 737         return skb;
 738 }
 739 
 740 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 741 {
 742         struct sk_buff *skb;
 743 
 744         if (sk == NULL)
 745                 return;
 746         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 747                 tcp_send_skb(sk, skb);
 748 }
 749 
 750 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 751 {
 752         struct sk_buff * tmp;
 753         unsigned long flags;
 754 
 755         save_flags(flags);
 756         cli();
 757         tmp = sk->partial;
 758         if (tmp)
 759                 del_timer(&sk->partial_timer);
 760         sk->partial = skb;
 761         init_timer(&sk->partial_timer);
 762         sk->partial_timer.expires = HZ;
 763         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 764         sk->partial_timer.data = (unsigned long) sk;
 765         add_timer(&sk->partial_timer);
 766         restore_flags(flags);
 767         if (tmp)
 768                 tcp_send_skb(sk, tmp);
 769 }
 770 
 771 
 772 /*
 773  *      This routine sends an ack and also updates the window. 
 774  */
 775  
 776 static void tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 777              struct sock *sk,
 778              struct tcphdr *th, unsigned long daddr)
 779 {
 780         struct sk_buff *buff;
 781         struct tcphdr *t1;
 782         struct device *dev = NULL;
 783         int tmp;
 784 
 785         if(sk->zapped)
 786                 return;         /* We have been reset, we may not send again */
 787         /*
 788          * We need to grab some memory, and put together an ack,
 789          * and then put it into the queue to be sent.
 790          */
 791 
 792         buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 793         if (buff == NULL) 
 794         {
 795                 /* Force it to send an ack. */
 796                 sk->ack_backlog++;
 797                 if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) 
 798                 {
 799                         reset_timer(sk, TIME_WRITE, 10);
 800                 }
 801                 return;
 802         }
 803 
 804         buff->len = sizeof(struct tcphdr);
 805         buff->sk = sk;
 806         buff->localroute = sk->localroute;
 807         t1 =(struct tcphdr *) buff->data;
 808 
 809         /* Put in the IP header and routing stuff. */
 810         tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 811                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 812         if (tmp < 0) 
 813         {
 814                 buff->free=1;
 815                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 816                 return;
 817         }
 818         buff->len += tmp;
 819         t1 =(struct tcphdr *)((char *)t1 +tmp);
 820 
 821         /* FIXME: */
 822         memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 823 
 824         /*
 825          *      Swap the send and the receive. 
 826          */
 827          
 828         t1->dest = th->source;
 829         t1->source = th->dest;
 830         t1->seq = ntohl(sequence);
 831         t1->ack = 1;
 832         sk->window = tcp_select_window(sk);
 833         t1->window = ntohs(sk->window);
 834         t1->res1 = 0;
 835         t1->res2 = 0;
 836         t1->rst = 0;
 837         t1->urg = 0;
 838         t1->syn = 0;
 839         t1->psh = 0;
 840         t1->fin = 0;
 841         if (ack == sk->acked_seq) 
 842         {
 843                 sk->ack_backlog = 0;
 844                 sk->bytes_rcv = 0;
 845                 sk->ack_timed = 0;
 846                 if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
 847                                   && sk->timeout == TIME_WRITE) 
 848                 {
 849                         if(sk->keepopen) {
 850                                 reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 851                         } else {
 852                                 delete_timer(sk);
 853                         }
 854                 }
 855         }
 856         t1->ack_seq = ntohl(ack);
 857         t1->doff = sizeof(*t1)/4;
 858         tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 859         if (sk->debug)
 860                  printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 861         tcp_statistics.TcpOutSegs++;
 862         sk->prot->queue_xmit(sk, dev, buff, 1);
 863 }
 864 
 865 
 866 /* 
 867  *      This routine builds a generic TCP header. 
 868  */
 869  
 870 static int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 871 {
 872 
 873         /* FIXME: want to get rid of this. */
 874         memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 875         th->seq = htonl(sk->write_seq);
 876         th->psh =(push == 0) ? 1 : 0;
 877         th->doff = sizeof(*th)/4;
 878         th->ack = 1;
 879         th->fin = 0;
 880         sk->ack_backlog = 0;
 881         sk->bytes_rcv = 0;
 882         sk->ack_timed = 0;
 883         th->ack_seq = htonl(sk->acked_seq);
 884         sk->window = tcp_select_window(sk);
 885         th->window = htons(sk->window);
 886 
 887         return(sizeof(*th));
 888 }
 889 
 890 /*
 891  *      This routine copies from a user buffer into a socket,
 892  *      and starts the transmit system.
 893  */
 894 
 895 static int tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 896           int len, int nonblock, unsigned flags)
 897 {
 898         int copied = 0;
 899         int copy;
 900         int tmp;
 901         struct sk_buff *skb;
 902         struct sk_buff *send_tmp;
 903         unsigned char *buff;
 904         struct proto *prot;
 905         struct device *dev = NULL;
 906 
 907         sk->inuse=1;
 908         prot = sk->prot;
 909         while(len > 0) 
 910         {
 911                 if (sk->err) 
 912                 {                       /* Stop on an error */
 913                         release_sock(sk);
 914                         if (copied) 
 915                                 return(copied);
 916                         tmp = -sk->err;
 917                         sk->err = 0;
 918                         return(tmp);
 919                 }
 920 
 921         /*
 922          *      First thing we do is make sure that we are established. 
 923          */
 924         
 925                 if (sk->shutdown & SEND_SHUTDOWN) 
 926                 {
 927                         release_sock(sk);
 928                         sk->err = EPIPE;
 929                         if (copied) 
 930                                 return(copied);
 931                         sk->err = 0;
 932                         return(-EPIPE);
 933                 }
 934 
 935 
 936         /* 
 937          *      Wait for a connection to finish.
 938          */
 939         
 940                 while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
 941                 {
 942                         if (sk->err) 
 943                         {
 944                                 release_sock(sk);
 945                                 if (copied) 
 946                                         return(copied);
 947                                 tmp = -sk->err;
 948                                 sk->err = 0;
 949                                 return(tmp);
 950                         }
 951 
 952                         if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
 953                         {
 954                                 release_sock(sk);
 955                                 if (copied) 
 956                                         return(copied);
 957 
 958                                 if (sk->err) 
 959                                 {
 960                                         tmp = -sk->err;
 961                                         sk->err = 0;
 962                                         return(tmp);
 963                                 }
 964 
 965                                 if (sk->keepopen) 
 966                                 {
 967                                         send_sig(SIGPIPE, current, 0);
 968                                 }
 969                                 return(-EPIPE);
 970                         }
 971 
 972                         if (nonblock || copied) 
 973                         {
 974                                 release_sock(sk);
 975                                 if (copied) 
 976                                         return(copied);
 977                                 return(-EAGAIN);
 978                         }
 979 
 980                         release_sock(sk);
 981                         cli();
 982                 
 983                         if (sk->state != TCP_ESTABLISHED &&
 984                                 sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
 985                         {
 986                                 interruptible_sleep_on(sk->sleep);
 987                                 if (current->signal & ~current->blocked) 
 988                                 {
 989                                         sti();
 990                                         if (copied) 
 991                                                 return(copied);
 992                                         return(-ERESTARTSYS);
 993                                 }
 994                         }
 995                         sk->inuse = 1;
 996                         sti();
 997                 }
 998 
 999         /*
1000          * The following code can result in copy <= if sk->mss is ever
1001          * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
1002          * sk->mtu is constant once SYN processing is finished.  I.e. we
1003          * had better not get here until we've seen his SYN and at least one
1004          * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
1005          * But ESTABLISHED should guarantee that.  sk->max_window is by definition
1006          * non-decreasing.  Note that any ioctl to set user_mss must be done
1007          * before the exchange of SYN's.  If the initial ack from the other
1008          * end has a window of 0, max_window and thus mss will both be 0.
1009          */
1010 
1011         /* 
1012          *      Now we need to check if we have a half built packet. 
1013          */
1014 
1015                 if ((skb = tcp_dequeue_partial(sk)) != NULL) 
1016                 {
1017                         int hdrlen;
1018 
1019                          /* IP header + TCP header */
1020                         hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
1021                                  + sizeof(struct tcphdr);
1022         
1023                         /* Add more stuff to the end of skb->len */
1024                         if (!(flags & MSG_OOB)) 
1025                         {
1026                                 copy = min(sk->mss - (skb->len - hdrlen), len);
1027                                 /* FIXME: this is really a bug. */
1028                                 if (copy <= 0) 
1029                                 {
1030                                         printk("TCP: **bug**: \"copy\" <= 0!!\n");
1031                                         copy = 0;
1032                                 }
1033           
1034                                 memcpy_fromfs(skb->data + skb->len, from, copy);
1035                                 skb->len += copy;
1036                                 from += copy;
1037                                 copied += copy;
1038                                 len -= copy;
1039                                 sk->write_seq += copy;
1040                         }
1041                         if ((skb->len - hdrlen) >= sk->mss ||
1042                                 (flags & MSG_OOB) || !sk->packets_out)
1043                                 tcp_send_skb(sk, skb);
1044                         else
1045                                 tcp_enqueue_partial(skb, sk);
1046                         continue;
1047                 }
1048 
1049         /*
1050          * We also need to worry about the window.
1051          * If window < 1/2 the maximum window we've seen from this
1052          *   host, don't use it.  This is sender side
1053          *   silly window prevention, as specified in RFC1122.
1054          *   (Note that this is different than earlier versions of
1055          *   SWS prevention, e.g. RFC813.).  What we actually do is 
1056          *   use the whole MSS.  Since the results in the right
1057          *   edge of the packet being outside the window, it will
1058          *   be queued for later rather than sent.
1059          */
1060 
1061                 copy = sk->window_seq - sk->write_seq;
1062                 if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
1063                         copy = sk->mss;
1064                 if (copy > len)
1065                         copy = len;
1066 
1067         /*
1068          *      We should really check the window here also. 
1069          */
1070          
1071                 send_tmp = NULL;
1072                 if (copy < sk->mss && !(flags & MSG_OOB)) 
1073                 {
1074                         /*
1075                          *      We will release the socket incase we sleep here. 
1076                          */
1077                         release_sock(sk);
1078                         /*
1079                          *      NB: following must be mtu, because mss can be increased.
1080                          *      mss is always <= mtu 
1081                          */
1082                         skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
1083                         sk->inuse = 1;
1084                         send_tmp = skb;
1085                 } 
1086                 else 
1087                 {
1088                         /*
1089                          *      We will release the socket incase we sleep here. 
1090                          */
1091                         release_sock(sk);
1092                         skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
1093                         sk->inuse = 1;
1094                 }
1095 
1096                 /*
1097                  *      If we didn't get any memory, we need to sleep. 
1098                  */
1099 
1100                 if (skb == NULL) 
1101                 {
1102                         if (nonblock) 
1103                         {
1104                                 release_sock(sk);
1105                                 if (copied) 
1106                                         return(copied);
1107                                 return(-EAGAIN);
1108                         }
1109 
1110                         /*
1111                          *      FIXME: here is another race condition. 
1112                          */
1113 
1114                         tmp = sk->wmem_alloc;
1115                         release_sock(sk);
1116                         cli();
1117                         /*
1118                          *      Again we will try to avoid it. 
1119                          */
1120                         if (tmp <= sk->wmem_alloc &&
1121                                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
1122                                 && sk->err == 0) 
1123                         {
1124                                 interruptible_sleep_on(sk->sleep);
1125                                 if (current->signal & ~current->blocked) 
1126                                 {
1127                                         sti();
1128                                         if (copied) 
1129                                                 return(copied);
1130                                         return(-ERESTARTSYS);
1131                                 }
1132                         }
1133                         sk->inuse = 1;
1134                         sti();
1135                         continue;
1136                 }
1137 
1138                 skb->len = 0;
1139                 skb->sk = sk;
1140                 skb->free = 0;
1141                 skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
1142         
1143                 buff = skb->data;
1144         
1145                 /*
1146                  * FIXME: we need to optimize this.
1147                  * Perhaps some hints here would be good.
1148                  */
1149                 
1150                 tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1151                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1152                 if (tmp < 0 ) 
1153                 {
1154                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1155                         release_sock(sk);
1156                         if (copied) 
1157                                 return(copied);
1158                         return(tmp);
1159                 }
1160                 skb->len += tmp;
1161                 skb->dev = dev;
1162                 buff += tmp;
1163                 skb->h.th =(struct tcphdr *) buff;
1164                 tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1165                 if (tmp < 0) 
1166                 {
1167                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1168                         release_sock(sk);
1169                         if (copied) 
1170                                 return(copied);
1171                         return(tmp);
1172                 }
1173 
1174                 if (flags & MSG_OOB) 
1175                 {
1176                         ((struct tcphdr *)buff)->urg = 1;
1177                         ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1178                 }
1179                 skb->len += tmp;
1180                 memcpy_fromfs(buff+tmp, from, copy);
1181 
1182                 from += copy;
1183                 copied += copy;
1184                 len -= copy;
1185                 skb->len += copy;
1186                 skb->free = 0;
1187                 sk->write_seq += copy;
1188         
1189                 if (send_tmp != NULL && sk->packets_out) 
1190                 {
1191                         tcp_enqueue_partial(send_tmp, sk);
1192                         continue;
1193                 }
1194                 tcp_send_skb(sk, skb);
1195         }
1196         sk->err = 0;
1197 
1198 /*
1199  *      Nagle's rule. Turn Nagle off with TCP_NODELAY for highly
1200  *      interactive fast network servers. It's meant to be on and
1201  *      it really improves the throughput though not the echo time
1202  *      on my slow slip link - Alan
1203  */
1204 
1205 /*
1206  *      Avoid possible race on send_tmp - c/o Johannes Stille 
1207  */
1208  
1209         if(sk->partial && ((!sk->packets_out) 
1210      /* If not nagling we can send on the before case too.. */
1211               || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1212         ))
1213                 tcp_send_partial(sk);
1214 
1215         release_sock(sk);
1216         return(copied);
1217 }
1218 
1219 
1220 static int tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1221            int len, int nonblock, unsigned flags,
1222            struct sockaddr_in *addr, int addr_len)
1223 {
1224         if (flags & ~(MSG_OOB|MSG_DONTROUTE))
1225                 return -EINVAL;
1226         if (!tcp_connected(sk->state))
1227                 return -ENOTCONN;
1228         if (addr_len < sizeof(*addr))
1229                 return -EINVAL;
1230         if (addr->sin_family && addr->sin_family != AF_INET) 
1231                 return -EINVAL;
1232         if (addr->sin_port != sk->dummy_th.dest) 
1233                 return -EISCONN;
1234         if (addr->sin_addr.s_addr != sk->daddr) 
1235                 return -EISCONN;
1236         return tcp_write(sk, from, len, nonblock, flags);
1237 }
1238 
1239 
1240 static void tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1241 {
1242         int tmp;
1243         struct device *dev = NULL;
1244         struct tcphdr *t1;
1245         struct sk_buff *buff;
1246 
1247         if (!sk->ack_backlog) 
1248                 return;
1249 
1250         /*
1251          * FIXME: we need to put code here to prevent this routine from
1252          * being called.  Being called once in a while is ok, so only check
1253          * if this is the second time in a row.
1254          */
1255 
1256         /*
1257          * We need to grab some memory, and put together an ack,
1258          * and then put it into the queue to be sent.
1259          */
1260 
1261         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1262         if (buff == NULL) 
1263         {
1264                 /* Try again real soon. */
1265                 reset_timer(sk, TIME_WRITE, 10);
1266                 return;
1267         }
1268 
1269         buff->len = sizeof(struct tcphdr);
1270         buff->sk = sk;
1271         buff->localroute = sk->localroute;
1272         
1273         /*
1274          *      Put in the IP header and routing stuff. 
1275          */
1276 
1277         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1278                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1279         if (tmp < 0) 
1280         {
1281                 buff->free=1;
1282                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1283                 return;
1284         }
1285 
1286         buff->len += tmp;
1287         t1 =(struct tcphdr *)(buff->data +tmp);
1288 
1289         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1290         t1->seq = htonl(sk->sent_seq);
1291         t1->ack = 1;
1292         t1->res1 = 0;
1293         t1->res2 = 0;
1294         t1->rst = 0;
1295         t1->urg = 0;
1296         t1->syn = 0;
1297         t1->psh = 0;
1298         sk->ack_backlog = 0;
1299         sk->bytes_rcv = 0;
1300         sk->window = tcp_select_window(sk);
1301         t1->window = ntohs(sk->window);
1302         t1->ack_seq = ntohl(sk->acked_seq);
1303         t1->doff = sizeof(*t1)/4;
1304         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1305         sk->prot->queue_xmit(sk, dev, buff, 1);
1306         tcp_statistics.TcpOutSegs++;
1307 }
1308 
1309 
1310 /*
1311  *      FIXME:
1312  *      This routine frees used buffers.
1313  *      It should consider sending an ACK to let the
1314  *      other end know we now have a bigger window.
1315  */
1316 
1317 static void cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1318 {
1319         unsigned long flags;
1320         unsigned long left;
1321         struct sk_buff *skb;
1322         unsigned long rspace;
1323 
1324         if(sk->debug)
1325                 printk("cleaning rbuf for sk=%p\n", sk);
1326   
1327         save_flags(flags);
1328         cli();
1329   
1330         left = sk->prot->rspace(sk);
1331  
1332         /*
1333          * We have to loop through all the buffer headers,
1334          * and try to free up all the space we can.
1335          */
1336 
1337         while((skb=skb_peek(&sk->receive_queue)) != NULL) 
1338         {
1339                 if (!skb->used) 
1340                         break;
1341                 skb_unlink(skb);
1342                 skb->sk = sk;
1343                 kfree_skb(skb, FREE_READ);
1344         }
1345 
1346         restore_flags(flags);
1347 
1348         /*
1349          * FIXME:
1350          * At this point we should send an ack if the difference
1351          * in the window, and the amount of space is bigger than
1352          * TCP_WINDOW_DIFF.
1353          */
1354 
1355         if(sk->debug)
1356                 printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk),
1357                                             left);
1358         if ((rspace=sk->prot->rspace(sk)) != left) 
1359         {
1360                 /*
1361                  * This area has caused the most trouble.  The current strategy
1362                  * is to simply do nothing if the other end has room to send at
1363                  * least 3 full packets, because the ack from those will auto-
1364                  * matically update the window.  If the other end doesn't think
1365                  * we have much space left, but we have room for at least 1 more
1366                  * complete packet than it thinks we do, we will send an ack
1367                  * immediately.  Otherwise we will wait up to .5 seconds in case
1368                  * the user reads some more.
1369                  */
1370                 sk->ack_backlog++;
1371         /*
1372          * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1373          * if the other end is offering a window smaller than the agreed on MSS
1374          * (called sk->mtu here).  In theory there's no connection between send
1375          * and receive, and so no reason to think that they're going to send
1376          * small packets.  For the moment I'm using the hack of reducing the mss
1377          * only on the send side, so I'm putting mtu here.
1378          */
1379 
1380                 if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) 
1381                 {
1382                         /* Send an ack right now. */
1383                         tcp_read_wakeup(sk);
1384                 } 
1385                 else 
1386                 {
1387                         /* Force it to send an ack soon. */
1388                         int was_active = del_timer(&sk->timer);
1389                         if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
1390                         {
1391                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1392                         } 
1393                         else
1394                                 add_timer(&sk->timer);
1395                 }
1396         }
1397 } 
1398 
1399 
1400 /*
1401  *      Handle reading urgent data. 
1402  */
1403  
1404 static int tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1405              unsigned char *to, int len, unsigned flags)
1406 {
1407         struct wait_queue wait = { current, NULL };
1408 
1409         while (len > 0) 
1410         {
1411                 if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1412                         return -EINVAL;
1413                 if (sk->urg_data & URG_VALID) 
1414                 {
1415                         char c = sk->urg_data;
1416                         if (!(flags & MSG_PEEK))
1417                                 sk->urg_data = URG_READ;
1418                         put_fs_byte(c, to);
1419                         return 1;
1420                 }
1421 
1422                 if (sk->err) 
1423                 {
1424                         int tmp = -sk->err;
1425                         sk->err = 0;
1426                         return tmp;
1427                 }
1428 
1429                 if (sk->state == TCP_CLOSE || sk->done) 
1430                 {
1431                         if (!sk->done) {
1432                                 sk->done = 1;
1433                                 return 0;
1434                         }
1435                         return -ENOTCONN;
1436                 }
1437 
1438                 if (sk->shutdown & RCV_SHUTDOWN) 
1439                 {
1440                         sk->done = 1;
1441                         return 0;
1442                 }
1443 
1444                 if (nonblock)
1445                         return -EAGAIN;
1446 
1447                 if (current->signal & ~current->blocked)
1448                         return -ERESTARTSYS;
1449 
1450                 current->state = TASK_INTERRUPTIBLE;
1451                 add_wait_queue(sk->sleep, &wait);
1452                 if ((sk->urg_data & URG_NOTYET) && sk->err == 0 &&
1453                     !(sk->shutdown & RCV_SHUTDOWN))
1454                         schedule();
1455                 remove_wait_queue(sk->sleep, &wait);
1456                 current->state = TASK_RUNNING;
1457         }
1458         return 0;
1459 }
1460 
1461 
1462 /*
1463  *      This routine copies from a sock struct into the user buffer. 
1464  */
1465  
1466 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1467         int len, int nonblock, unsigned flags)
1468 {
1469         struct wait_queue wait = { current, NULL };
1470         int copied = 0;
1471         unsigned long peek_seq;
1472         unsigned long *seq;
1473         unsigned long used;
1474 
1475         /* This error should be checked. */
1476         if (sk->state == TCP_LISTEN)
1477                 return -ENOTCONN;
1478 
1479         /* Urgent data needs to be handled specially. */
1480         if (flags & MSG_OOB)
1481                 return tcp_read_urg(sk, nonblock, to, len, flags);
1482 
1483         peek_seq = sk->copied_seq;
1484         seq = &sk->copied_seq;
1485         if (flags & MSG_PEEK)
1486                 seq = &peek_seq;
1487 
1488         add_wait_queue(sk->sleep, &wait);
1489         sk->inuse = 1;
1490         while (len > 0) 
1491         {
1492                 struct sk_buff * skb;
1493                 unsigned long offset;
1494         
1495                 /*
1496                  * are we at urgent data? Stop if we have read anything.
1497                  */
1498                 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1499                         break;
1500 
1501                 current->state = TASK_INTERRUPTIBLE;
1502 
1503                 skb = skb_peek(&sk->receive_queue);
1504                 do 
1505                 {
1506                         if (!skb)
1507                                 break;
1508                         if (before(1+*seq, skb->h.th->seq))
1509                                 break;
1510                         offset = 1 + *seq - skb->h.th->seq;
1511                         if (skb->h.th->syn)
1512                                 offset--;
1513                         if (offset < skb->len)
1514                                 goto found_ok_skb;
1515                         if (!(flags & MSG_PEEK))
1516                                 skb->used = 1;
1517                         skb = skb->next;
1518                 }
1519                 while (skb != (struct sk_buff *)&sk->receive_queue);
1520 
1521                 if (copied)
1522                         break;
1523 
1524                 if (sk->err) 
1525                 {
1526                         copied = -sk->err;
1527                         sk->err = 0;
1528                         break;
1529                 }
1530 
1531                 if (sk->state == TCP_CLOSE) 
1532                 {
1533                         if (!sk->done) 
1534                         {
1535                                 sk->done = 1;
1536                                 break;
1537                         }
1538                         copied = -ENOTCONN;
1539                         break;
1540                 }
1541 
1542                 if (sk->shutdown & RCV_SHUTDOWN) 
1543                 {
1544                         sk->done = 1;
1545                         break;
1546                 }
1547                         
1548                 if (nonblock) 
1549                 {
1550                         copied = -EAGAIN;
1551                         break;
1552                 }
1553 
1554                 cleanup_rbuf(sk);
1555                 release_sock(sk);
1556                 schedule();
1557                 sk->inuse = 1;
1558 
1559                 if (current->signal & ~current->blocked) 
1560                 {
1561                         copied = -ERESTARTSYS;
1562                         break;
1563                 }
1564                 continue;
1565 
1566         found_ok_skb:
1567                 /* Ok so how much can we use ? */
1568                 used = skb->len - offset;
1569                 if (len < used)
1570                         used = len;
1571                 /* do we have urgent data here? */
1572                 if (sk->urg_data) 
1573                 {
1574                         unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1575                         if (urg_offset < used) 
1576                         {
1577                                 if (!urg_offset) 
1578                                 {
1579                                         if (!sk->urginline) 
1580                                         {
1581                                                 ++*seq;
1582                                                 offset++;
1583                                                 used--;
1584                                         }
1585                                 }
1586                                 else
1587                                         used = urg_offset;
1588                         }
1589                 }
1590                 /* Copy it */
1591                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1592                         skb->h.th->doff*4 + offset, used);
1593                 copied += used;
1594                 len -= used;
1595                 to += used;
1596                 *seq += used;
1597                 if (after(sk->copied_seq+1,sk->urg_seq))
1598                         sk->urg_data = 0;
1599                 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1600                         skb->used = 1;
1601         }
1602         remove_wait_queue(sk->sleep, &wait);
1603         current->state = TASK_RUNNING;
1604 
1605         /* Clean up data we have read: This will do ACK frames */
1606         cleanup_rbuf(sk);
1607         release_sock(sk);
1608         return copied;
1609 }
1610 
1611  
1612 /*
1613  *      Shutdown the sending side of a connection.
1614  */
1615 
1616 void tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1617 {
1618         struct sk_buff *buff;
1619         struct tcphdr *t1, *th;
1620         struct proto *prot;
1621         int tmp;
1622         struct device *dev = NULL;
1623 
1624         /*
1625          * We need to grab some memory, and put together a FIN,
1626          * and then put it into the queue to be sent.
1627          * FIXME:
1628          *
1629          *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1630          *      Most of this is guesswork, so maybe it will work...
1631          */
1632 
1633         if (!(how & SEND_SHUTDOWN)) 
1634                 return;
1635          
1636         /*
1637          *      If we've already sent a FIN, return. 
1638          */
1639          
1640         if (sk->state == TCP_FIN_WAIT1 ||
1641             sk->state == TCP_FIN_WAIT2 ||
1642             sk->state == TCP_CLOSING ||
1643             sk->state == TCP_LAST_ACK ||
1644             sk->state == TCP_TIME_WAIT
1645         ) 
1646         {
1647                 return;
1648         }
1649         sk->inuse = 1;
1650 
1651         /*
1652          * flag that the sender has shutdown
1653          */
1654 
1655         sk->shutdown |= SEND_SHUTDOWN;
1656 
1657         /*
1658          *  Clear out any half completed packets. 
1659          */
1660 
1661         if (sk->partial)
1662                 tcp_send_partial(sk);
1663 
1664         prot =(struct proto *)sk->prot;
1665         th =(struct tcphdr *)&sk->dummy_th;
1666         release_sock(sk); /* incase the malloc sleeps. */
1667         buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1668         if (buff == NULL)
1669                 return;
1670         sk->inuse = 1;
1671 
1672         buff->sk = sk;
1673         buff->len = sizeof(*t1);
1674         buff->localroute = sk->localroute;
1675         t1 =(struct tcphdr *) buff->data;
1676 
1677         /*
1678          *      Put in the IP header and routing stuff. 
1679          */
1680 
1681         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1682                            IPPROTO_TCP, sk->opt,
1683                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1684         if (tmp < 0) 
1685         {
1686                 /*
1687                  *      Finish anyway, treat this as a send that got lost. 
1688                  *
1689                  *      Enter FIN_WAIT1 on normal shutdown, which waits for
1690                  *      written data to be completely acknowledged along
1691                  *      with an acknowledge to our FIN.
1692                  *
1693                  *      Enter FIN_WAIT2 on abnormal shutdown -- close before
1694                  *      connection established.
1695                  */
1696                 buff->free=1;
1697                 prot->wfree(sk,buff->mem_addr, buff->mem_len);
1698 
1699                 if (sk->state == TCP_ESTABLISHED)
1700                         tcp_set_state(sk,TCP_FIN_WAIT1);
1701                 else if(sk->state == TCP_CLOSE_WAIT)
1702                         tcp_set_state(sk,TCP_LAST_ACK);
1703                 else
1704                         tcp_set_state(sk,TCP_FIN_WAIT2);
1705 
1706                 release_sock(sk);
1707                 return;
1708         }
1709 
1710         t1 =(struct tcphdr *)((char *)t1 +tmp);
1711         buff->len += tmp;
1712         buff->dev = dev;
1713         memcpy(t1, th, sizeof(*t1));
1714         t1->seq = ntohl(sk->write_seq);
1715         sk->write_seq++;
1716         buff->h.seq = sk->write_seq;
1717         t1->ack = 1;
1718         t1->ack_seq = ntohl(sk->acked_seq);
1719         t1->window = ntohs(sk->window=tcp_select_window(sk));
1720         t1->fin = 1;
1721         t1->rst = 0;
1722         t1->doff = sizeof(*t1)/4;
1723         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1724 
1725         /*
1726          * If there is data in the write queue, the fin must be appended to
1727          * the write queue.
1728          */
1729         
1730         if (skb_peek(&sk->write_queue) != NULL) 
1731         {
1732                 buff->free=0;
1733                 if (buff->next != NULL) 
1734                 {
1735                         printk("tcp_shutdown: next != NULL\n");
1736                         skb_unlink(buff);
1737                 }
1738                 skb_queue_tail(&sk->write_queue, buff);
1739         } 
1740         else 
1741         {
1742                 sk->sent_seq = sk->write_seq;
1743                 sk->prot->queue_xmit(sk, dev, buff, 0);
1744         }
1745 
1746         if (sk->state == TCP_ESTABLISHED) 
1747                 tcp_set_state(sk,TCP_FIN_WAIT1);
1748         else if (sk->state == TCP_CLOSE_WAIT)
1749                 tcp_set_state(sk,TCP_LAST_ACK);
1750         else
1751                 tcp_set_state(sk,TCP_FIN_WAIT2);
1752 
1753         release_sock(sk);
1754 }
1755 
1756 
1757 static int
1758 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1759              int to_len, int nonblock, unsigned flags,
1760              struct sockaddr_in *addr, int *addr_len)
1761 {
1762         int result;
1763   
1764         /* 
1765          *      Have to check these first unlike the old code. If 
1766          *      we check them after we lose data on an error
1767          *      which is wrong 
1768          */
1769 
1770         if(addr_len)
1771                 *addr_len = sizeof(*addr);
1772         result=tcp_read(sk, to, to_len, nonblock, flags);
1773 
1774         if (result < 0) 
1775                 return(result);
1776   
1777         if(addr)
1778         {
1779                 addr->sin_family = AF_INET;
1780                 addr->sin_port = sk->dummy_th.dest;
1781                 addr->sin_addr.s_addr = sk->daddr;
1782         }
1783         return(result);
1784 }
1785 
1786 
1787 /*
1788  *      This routine will send an RST to the other tcp. 
1789  */
1790  
1791 static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1792           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1793 {
1794         struct sk_buff *buff;
1795         struct tcphdr *t1;
1796         int tmp;
1797         struct device *ndev=NULL;
1798   
1799 /*
1800  * We need to grab some memory, and put together an RST,
1801  * and then put it into the queue to be sent.
1802  */
1803 
1804         buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1805         if (buff == NULL) 
1806                 return;
1807 
1808         buff->len = sizeof(*t1);
1809         buff->sk = NULL;
1810         buff->dev = dev;
1811         buff->localroute = 0;
1812 
1813         t1 =(struct tcphdr *) buff->data;
1814 
1815         /*
1816          *      Put in the IP header and routing stuff. 
1817          */
1818 
1819         tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
1820                            sizeof(struct tcphdr),tos,ttl);
1821         if (tmp < 0) 
1822         {
1823                 buff->free = 1;
1824                 prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1825                 return;
1826         }
1827 
1828         t1 =(struct tcphdr *)((char *)t1 +tmp);
1829         buff->len += tmp;
1830         memcpy(t1, th, sizeof(*t1));
1831 
1832         /*
1833          *      Swap the send and the receive. 
1834          */
1835 
1836         t1->dest = th->source;
1837         t1->source = th->dest;
1838         t1->rst = 1;  
1839         t1->window = 0;
1840   
1841         if(th->ack)
1842         {
1843                 t1->ack = 0;
1844                 t1->seq = th->ack_seq;
1845                 t1->ack_seq = 0;
1846         }
1847         else
1848         {
1849                 t1->ack = 1;
1850                 if(!th->syn)
1851                         t1->ack_seq=htonl(th->seq);
1852                 else
1853                         t1->ack_seq=htonl(th->seq+1);
1854                 t1->seq=0;
1855         }
1856 
1857         t1->syn = 0;
1858         t1->urg = 0;
1859         t1->fin = 0;
1860         t1->psh = 0;
1861         t1->doff = sizeof(*t1)/4;
1862         tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1863         prot->queue_xmit(NULL, dev, buff, 1);
1864         tcp_statistics.TcpOutSegs++;
1865 }
1866 
1867 
1868 /*
1869  *      Look for tcp options. Parses everything but only knows about MSS.
1870  *      This routine is always called with the packet containing the SYN.
1871  *      However it may also be called with the ack to the SYN.  So you
1872  *      can't assume this is always the SYN.  It's always called after
1873  *      we have set up sk->mtu to our own MTU.
1874  */
1875  
1876 static void tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1877 {
1878         unsigned char *ptr;
1879         int length=(th->doff*4)-sizeof(struct tcphdr);
1880         int mss_seen = 0;
1881     
1882         ptr = (unsigned char *)(th + 1);
1883   
1884         while(length>0)
1885         {
1886                 int opcode=*ptr++;
1887                 int opsize=*ptr++;
1888                 switch(opcode)
1889                 {
1890                         case TCPOPT_EOL:
1891                                 return;
1892                         case TCPOPT_NOP:
1893                                 length-=2;
1894                                 continue;
1895                         
1896                         default:
1897                                 if(opsize<=2)   /* Avoid silly options looping forever */
1898                                         return;
1899                                 switch(opcode)
1900                                 {
1901                                         case TCPOPT_MSS:
1902                                                 if(opsize==4 && th->syn)
1903                                                 {
1904                                                         sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1905                                                         mss_seen = 1;
1906                                                 }
1907                                                 break;
1908                                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1909                                 }
1910                                 ptr+=opsize-2;
1911                                 length-=opsize;
1912                 }
1913         }
1914         if (th->syn) 
1915         {
1916                 if (! mss_seen)
1917                       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1918         }
1919 #ifdef CONFIG_INET_PCTCP
1920         sk->mss = min(sk->max_window >> 1, sk->mtu);
1921 #else    
1922         sk->mss = min(sk->max_window, sk->mtu);
1923 #endif  
1924 }
1925 
1926 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1927 {
1928         dst = ntohl(dst);
1929         if (IN_CLASSA(dst))
1930                 return htonl(IN_CLASSA_NET);
1931         if (IN_CLASSB(dst))
1932                 return htonl(IN_CLASSB_NET);
1933         return htonl(IN_CLASSC_NET);
1934 }
1935 
1936 /*
1937  *      This routine handles a connection request.
1938  *      It should make sure we haven't already responded.
1939  *      Because of the way BSD works, we have to send a syn/ack now.
1940  *      This also means it will be harder to close a socket which is
1941  *      listening.
1942  */
1943  
1944 static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1945                  unsigned long daddr, unsigned long saddr,
1946                  struct options *opt, struct device *dev)
1947 {
1948         struct sk_buff *buff;
1949         struct tcphdr *t1;
1950         unsigned char *ptr;
1951         struct sock *newsk;
1952         struct tcphdr *th;
1953         struct device *ndev=NULL;
1954         int tmp;
1955         struct rtable *rt;
1956   
1957         th = skb->h.th;
1958 
1959         /* If the socket is dead, don't accept the connection. */
1960         if (!sk->dead) 
1961         {
1962                 sk->data_ready(sk,0);
1963         }
1964         else 
1965         {
1966                 tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1967                 tcp_statistics.TcpAttemptFails++;
1968                 kfree_skb(skb, FREE_READ);
1969                 return;
1970         }
1971 
1972         /*
1973          * Make sure we can accept more.  This will prevent a
1974          * flurry of syns from eating up all our memory.
1975          */
1976 
1977         if (sk->ack_backlog >= sk->max_ack_backlog) 
1978         {
1979                 tcp_statistics.TcpAttemptFails++;
1980                 kfree_skb(skb, FREE_READ);
1981                 return;
1982         }
1983 
1984         /*
1985          * We need to build a new sock struct.
1986          * It is sort of bad to have a socket without an inode attached
1987          * to it, but the wake_up's will just wake up the listening socket,
1988          * and if the listening socket is destroyed before this is taken
1989          * off of the queue, this will take care of it.
1990          */
1991 
1992         newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
1993         if (newsk == NULL) 
1994         {
1995                 /* just ignore the syn.  It will get retransmitted. */
1996                 tcp_statistics.TcpAttemptFails++;
1997                 kfree_skb(skb, FREE_READ);
1998                 return;
1999         }
2000 
2001         memcpy(newsk, sk, sizeof(*newsk));
2002         skb_queue_head_init(&newsk->write_queue);
2003         skb_queue_head_init(&newsk->receive_queue);
2004         newsk->send_head = NULL;
2005         newsk->send_tail = NULL;
2006         skb_queue_head_init(&newsk->back_log);
2007         newsk->rtt = 0;         /*TCP_CONNECT_TIME<<3*/
2008         newsk->rto = TCP_TIMEOUT_INIT;
2009         newsk->mdev = 0;
2010         newsk->max_window = 0;
2011         newsk->cong_window = 1;
2012         newsk->cong_count = 0;
2013         newsk->ssthresh = 0;
2014         newsk->backoff = 0;
2015         newsk->blog = 0;
2016         newsk->intr = 0;
2017         newsk->proc = 0;
2018         newsk->done = 0;
2019         newsk->partial = NULL;
2020         newsk->pair = NULL;
2021         newsk->wmem_alloc = 0;
2022         newsk->rmem_alloc = 0;
2023         newsk->localroute = sk->localroute;
2024 
2025         newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
2026 
2027         newsk->err = 0;
2028         newsk->shutdown = 0;
2029         newsk->ack_backlog = 0;
2030         newsk->acked_seq = skb->h.th->seq+1;
2031         newsk->fin_seq = skb->h.th->seq;
2032         newsk->copied_seq = skb->h.th->seq;
2033         newsk->state = TCP_SYN_RECV;
2034         newsk->timeout = 0;
2035         newsk->write_seq = jiffies * SEQ_TICK - seq_offset;
2036         newsk->window_seq = newsk->write_seq;
2037         newsk->rcv_ack_seq = newsk->write_seq;
2038         newsk->urg_data = 0;
2039         newsk->retransmits = 0;
2040         newsk->destroy = 0;
2041         init_timer(&newsk->timer);
2042         newsk->timer.data = (unsigned long)newsk;
2043         newsk->timer.function = &net_timer;
2044         newsk->dummy_th.source = skb->h.th->dest;
2045         newsk->dummy_th.dest = skb->h.th->source;
2046         
2047         /*
2048          *      Swap these two, they are from our point of view. 
2049          */
2050          
2051         newsk->daddr = saddr;
2052         newsk->saddr = daddr;
2053 
2054         put_sock(newsk->num,newsk);
2055         newsk->dummy_th.res1 = 0;
2056         newsk->dummy_th.doff = 6;
2057         newsk->dummy_th.fin = 0;
2058         newsk->dummy_th.syn = 0;
2059         newsk->dummy_th.rst = 0;        
2060         newsk->dummy_th.psh = 0;
2061         newsk->dummy_th.ack = 0;
2062         newsk->dummy_th.urg = 0;
2063         newsk->dummy_th.res2 = 0;
2064         newsk->acked_seq = skb->h.th->seq + 1;
2065         newsk->copied_seq = skb->h.th->seq;
2066 
2067         /*
2068          *      Grab the ttl and tos values and use them 
2069          */
2070 
2071         newsk->ip_ttl=sk->ip_ttl;
2072         newsk->ip_tos=skb->ip_hdr->tos;
2073 
2074         /*
2075          *      Use 512 or whatever user asked for 
2076          */
2077 
2078         /*
2079          *      Note use of sk->user_mss, since user has no direct access to newsk 
2080          */
2081 
2082         rt=ip_rt_route(saddr, NULL,NULL);
2083         
2084         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
2085                 newsk->window_clamp = rt->rt_window;
2086         else
2087                 newsk->window_clamp = 0;
2088                 
2089         if (sk->user_mss)
2090                 newsk->mtu = sk->user_mss;
2091         else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
2092                 newsk->mtu = rt->rt_mss - HEADER_SIZE;
2093         else 
2094         {
2095 #ifdef CONFIG_INET_SNARL        /* Sub Nets Are Local */
2096                 if ((saddr ^ daddr) & default_mask(saddr))
2097 #else
2098                 if ((saddr ^ daddr) & dev->pa_mask)
2099 #endif
2100                         newsk->mtu = 576 - HEADER_SIZE;
2101                 else
2102                         newsk->mtu = MAX_WINDOW;
2103         }
2104 
2105         /*
2106          *      But not bigger than device MTU 
2107          */
2108 
2109         newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
2110 
2111         /*
2112          *      This will min with what arrived in the packet 
2113          */
2114 
2115         tcp_options(newsk,skb->h.th);
2116 
2117         buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
2118         if (buff == NULL) 
2119         {
2120                 sk->err = -ENOMEM;
2121                 newsk->dead = 1;
2122                 release_sock(newsk);
2123                 kfree_skb(skb, FREE_READ);
2124                 tcp_statistics.TcpAttemptFails++;
2125                 return;
2126         }
2127   
2128         buff->len = sizeof(struct tcphdr)+4;
2129         buff->sk = newsk;
2130         buff->localroute = newsk->localroute;
2131 
2132         t1 =(struct tcphdr *) buff->data;
2133 
2134         /*
2135          *      Put in the IP header and routing stuff. 
2136          */
2137 
2138         tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
2139                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2140 
2141         /*
2142          *      Something went wrong. 
2143          */
2144 
2145         if (tmp < 0) 
2146         {
2147                 sk->err = tmp;
2148                 buff->free=1;
2149                 kfree_skb(buff,FREE_WRITE);
2150                 newsk->dead = 1;
2151                 release_sock(newsk);
2152                 skb->sk = sk;
2153                 kfree_skb(skb, FREE_READ);
2154                 tcp_statistics.TcpAttemptFails++;
2155                 return;
2156         }
2157 
2158         buff->len += tmp;
2159         t1 =(struct tcphdr *)((char *)t1 +tmp);
2160   
2161         memcpy(t1, skb->h.th, sizeof(*t1));
2162         buff->h.seq = newsk->write_seq;
2163         /*
2164          *      Swap the send and the receive. 
2165          */
2166         t1->dest = skb->h.th->source;
2167         t1->source = newsk->dummy_th.source;
2168         t1->seq = ntohl(newsk->write_seq++);
2169         t1->ack = 1;
2170         newsk->window = tcp_select_window(newsk);
2171         newsk->sent_seq = newsk->write_seq;
2172         t1->window = ntohs(newsk->window);
2173         t1->res1 = 0;
2174         t1->res2 = 0;
2175         t1->rst = 0;
2176         t1->urg = 0;
2177         t1->psh = 0;
2178         t1->syn = 1;
2179         t1->ack_seq = ntohl(skb->h.th->seq+1);
2180         t1->doff = sizeof(*t1)/4+1;
2181         ptr =(unsigned char *)(t1+1);
2182         ptr[0] = 2;
2183         ptr[1] = 4;
2184         ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2185         ptr[3] =(newsk->mtu) & 0xff;
2186 
2187         tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2188         newsk->prot->queue_xmit(newsk, dev, buff, 0);
2189 
2190         reset_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT);
2191         skb->sk = newsk;
2192 
2193         /*
2194          *      Charge the sock_buff to newsk. 
2195          */
2196          
2197         sk->rmem_alloc -= skb->mem_len;
2198         newsk->rmem_alloc += skb->mem_len;
2199         
2200         skb_queue_tail(&sk->receive_queue,skb);
2201         sk->ack_backlog++;
2202         release_sock(newsk);
2203         tcp_statistics.TcpOutSegs++;
2204 }
2205 
2206 
2207 static void tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2208 {
2209         struct sk_buff *buff;
2210         struct tcphdr *t1, *th;
2211         struct proto *prot;
2212         struct device *dev=NULL;
2213         int tmp;
2214 
2215         /*
2216          * We need to grab some memory, and put together a FIN, 
2217          * and then put it into the queue to be sent.
2218          */
2219         sk->inuse = 1;
2220         sk->keepopen = 1;
2221         sk->shutdown = SHUTDOWN_MASK;
2222 
2223         if (!sk->dead) 
2224                 sk->state_change(sk);
2225 
2226         if (timeout == 0) 
2227         {
2228                 /*
2229                  *  We need to flush the recv. buffs.  We do this only on the
2230                  *  descriptor close, not protocol-sourced closes, because the
2231                  *  reader process may not have drained the data yet!
2232                  */
2233 
2234                 if (skb_peek(&sk->receive_queue) != NULL) 
2235                 {
2236                         struct sk_buff *skb;
2237                         if(sk->debug)
2238                                 printk("Clean rcv queue\n");
2239                         while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
2240                                 kfree_skb(skb, FREE_READ);
2241                         if(sk->debug)
2242                                 printk("Cleaned.\n");
2243                 }
2244         }
2245 
2246         /*
2247          *      Get rid off any half-completed packets. 
2248          */
2249          
2250         if (sk->partial) 
2251         {
2252                 tcp_send_partial(sk);
2253         }
2254 
2255         switch(sk->state) 
2256         {
2257                 case TCP_FIN_WAIT1:
2258                 case TCP_FIN_WAIT2:
2259                 case TCP_CLOSING:
2260                         /*
2261                          * These states occur when we have already closed out
2262                          * our end.  If there is no timeout, we do not do
2263                          * anything.  We may still be in the middle of sending
2264                          * the remainder of our buffer, for example...
2265                          * resetting the timer would be inappropriate.
2266                          *
2267                          * XXX if retransmit count reaches limit, is tcp_close()
2268                          * called with timeout == 1 ? if not, we need to fix that.
2269                          */
2270                         if (!timeout) {
2271                                 int timer_active;
2272 
2273                                 timer_active = del_timer(&sk->timer);
2274                                 if (timer_active)
2275                                         add_timer(&sk->timer);
2276                                 else
2277                                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2278                         }
2279                         if (timeout) 
2280                                 tcp_time_wait(sk);
2281                         release_sock(sk);
2282                         return; /* break causes a double release - messy */
2283                 case TCP_TIME_WAIT:
2284                 case TCP_LAST_ACK:
2285                         /*
2286                          * A timeout from these states terminates the TCB.
2287                          */
2288                         if (timeout) 
2289                         {
2290                                 tcp_set_state(sk,TCP_CLOSE);
2291                         }
2292                         release_sock(sk);
2293                         return;
2294                 case TCP_LISTEN:
2295                         tcp_set_state(sk,TCP_CLOSE);
2296                         release_sock(sk);
2297                         return;
2298                 case TCP_CLOSE:
2299                         release_sock(sk);
2300                         return;
2301                 case TCP_CLOSE_WAIT:
2302                 case TCP_ESTABLISHED:
2303                 case TCP_SYN_SENT:
2304                 case TCP_SYN_RECV:
2305                         prot =(struct proto *)sk->prot;
2306                         th =(struct tcphdr *)&sk->dummy_th;
2307                         buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2308                         if (buff == NULL) 
2309                         {
2310                                 /* This will force it to try again later. */
2311                                 /* Or it would have if someone released the socket
2312                                    first. Anyway it might work now */
2313                                 release_sock(sk);
2314                                 if (sk->state != TCP_CLOSE_WAIT)
2315                                         tcp_set_state(sk,TCP_ESTABLISHED);
2316                                 reset_timer(sk, TIME_CLOSE, 100);
2317                                 return;
2318                         }
2319                         buff->sk = sk;
2320                         buff->free = 1;
2321                         buff->len = sizeof(*t1);
2322                         buff->localroute = sk->localroute;
2323                         t1 =(struct tcphdr *) buff->data;
2324         
2325                         /*
2326                          *      Put in the IP header and routing stuff. 
2327                          */
2328                         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2329                                          IPPROTO_TCP, sk->opt,
2330                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2331                         if (tmp < 0) 
2332                         {
2333                                 sk->write_seq++;        /* Very important 8) */
2334                                 kfree_skb(buff,FREE_WRITE);
2335 
2336                                 /*
2337                                  * Enter FIN_WAIT1 to await completion of
2338                                  * written out data and ACK to our FIN.
2339                                  */
2340 
2341                                 if(sk->state==TCP_ESTABLISHED)
2342                                         tcp_set_state(sk,TCP_FIN_WAIT1);
2343                                 else
2344                                         tcp_set_state(sk,TCP_FIN_WAIT2);
2345                                 reset_timer(sk, TIME_CLOSE,4*sk->rto);
2346                                 if(timeout)
2347                                         tcp_time_wait(sk);
2348 
2349                                 release_sock(sk);
2350                                 return;
2351                         }
2352 
2353                         t1 =(struct tcphdr *)((char *)t1 +tmp);
2354                         buff->len += tmp;
2355                         buff->dev = dev;
2356                         memcpy(t1, th, sizeof(*t1));
2357                         t1->seq = ntohl(sk->write_seq);
2358                         sk->write_seq++;
2359                         buff->h.seq = sk->write_seq;
2360                         t1->ack = 1;
2361         
2362                         /* 
2363                          *      Ack everything immediately from now on. 
2364                          */
2365 
2366                         sk->delay_acks = 0;
2367                         t1->ack_seq = ntohl(sk->acked_seq);
2368                         t1->window = ntohs(sk->window=tcp_select_window(sk));
2369                         t1->fin = 1;
2370                         t1->rst = 0;
2371                         t1->doff = sizeof(*t1)/4;
2372                         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2373 
2374                         tcp_statistics.TcpOutSegs++;
2375         
2376                         if (skb_peek(&sk->write_queue) == NULL) 
2377                         {
2378                                 sk->sent_seq = sk->write_seq;
2379                                 prot->queue_xmit(sk, dev, buff, 0);
2380                         } 
2381                         else 
2382                         {
2383                                 reset_timer(sk, TIME_WRITE, sk->rto);
2384                                 if (buff->next != NULL) 
2385                                 {
2386                                         printk("tcp_close: next != NULL\n");
2387                                         skb_unlink(buff);
2388                                 }
2389                                 skb_queue_tail(&sk->write_queue, buff);
2390                         }
2391 
2392                         /*
2393                          * If established (normal close), enter FIN_WAIT1.
2394                          * If in CLOSE_WAIT, enter LAST_ACK
2395                          * If in CLOSING, remain in CLOSING
2396                          * otherwise enter FIN_WAIT2
2397                          */
2398 
2399                         if (sk->state == TCP_ESTABLISHED)
2400                                 tcp_set_state(sk,TCP_FIN_WAIT1);
2401                         else if (sk->state == TCP_CLOSE_WAIT)
2402                                 tcp_set_state(sk,TCP_LAST_ACK);
2403                         else if (sk->state != TCP_CLOSING)
2404                                 tcp_set_state(sk,TCP_FIN_WAIT2);
2405         }
2406         release_sock(sk);
2407 }
2408 
2409 
2410 /*
2411  * This routine takes stuff off of the write queue,
2412  * and puts it in the xmit queue.
2413  */
2414 static void
2415 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2416 {
2417         struct sk_buff *skb;
2418 
2419         /*
2420          *      The bytes will have to remain here. In time closedown will
2421          *      empty the write queue and all will be happy 
2422          */
2423 
2424         if(sk->zapped)
2425                 return;
2426 
2427         while((skb = skb_peek(&sk->write_queue)) != NULL &&
2428                 before(skb->h.seq, sk->window_seq + 1) &&
2429                 (sk->retransmits == 0 ||
2430                  sk->timeout != TIME_WRITE ||
2431                  before(skb->h.seq, sk->rcv_ack_seq + 1))
2432                 && sk->packets_out < sk->cong_window) 
2433         {
2434                 IS_SKB(skb);
2435                 skb_unlink(skb);
2436                 /* See if we really need to send the packet. */
2437                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) 
2438                 {
2439                         sk->retransmits = 0;
2440                         kfree_skb(skb, FREE_WRITE);
2441                         if (!sk->dead) 
2442                                 sk->write_space(sk);
2443                 } 
2444                 else
2445                 {
2446                         struct tcphdr *th;
2447                         struct iphdr *iph;
2448                         int size;
2449 /*
2450  * put in the ack seq and window at this point rather than earlier,
2451  * in order to keep them monotonic.  We really want to avoid taking
2452  * back window allocations.  That's legal, but RFC1122 says it's frowned on.
2453  * Ack and window will in general have changed since this packet was put
2454  * on the write queue.
2455  */
2456                         iph = (struct iphdr *)(skb->data +
2457                                                skb->dev->hard_header_len);
2458                         th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
2459                         size = skb->len - (((unsigned char *) th) - skb->data);
2460                         
2461                         th->ack_seq = ntohl(sk->acked_seq);
2462                         th->window = ntohs(tcp_select_window(sk));
2463 
2464                         tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
2465 
2466                         sk->sent_seq = skb->h.seq;
2467                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2468                 }
2469         }
2470 }
2471 
2472 
2473 /*
2474  *      This routine deals with incoming acks, but not outgoing ones.
2475  */
2476 
2477 static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2478 {
2479         unsigned long ack;
2480         int flag = 0;
2481 
2482         /* 
2483          * 1 - there was data in packet as well as ack or new data is sent or 
2484          *     in shutdown state
2485          * 2 - data from retransmit queue was acked and removed
2486          * 4 - window shrunk or data from retransmit queue was acked and removed
2487          */
2488 
2489         if(sk->zapped)
2490                 return(1);      /* Dead, cant ack any more so why bother */
2491 
2492         ack = ntohl(th->ack_seq);
2493         if (ntohs(th->window) > sk->max_window) 
2494         {
2495                 sk->max_window = ntohs(th->window);
2496 #ifdef CONFIG_INET_PCTCP
2497                 sk->mss = min(sk->max_window>>1, sk->mtu);
2498 #else
2499                 sk->mss = min(sk->max_window, sk->mtu);
2500 #endif  
2501         }
2502 
2503         if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2504                 sk->retransmits = 0;
2505 
2506         if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
2507         {
2508                 if(sk->debug)
2509                         printk("Ack ignored %lu %lu\n",ack,sk->sent_seq);
2510                         
2511                 /*
2512                  *      Keepalive processing.
2513                  */
2514                  
2515                 if (after(ack, sk->sent_seq) || (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) 
2516                 {
2517                         return(0);
2518                 }
2519                 if (sk->keepopen) 
2520                 {
2521                         if(sk->timeout==TIME_KEEPOPEN)
2522                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2523                 }
2524                 return(1);
2525         }
2526 
2527         if (len != th->doff*4) 
2528                 flag |= 1;
2529 
2530         /* See if our window has been shrunk. */
2531 
2532         if (after(sk->window_seq, ack+ntohs(th->window))) 
2533         {
2534                 /*
2535                  * We may need to move packets from the send queue
2536                  * to the write queue, if the window has been shrunk on us.
2537                  * The RFC says you are not allowed to shrink your window
2538                  * like this, but if the other end does, you must be able
2539                  * to deal with it.
2540                  */
2541                 struct sk_buff *skb;
2542                 struct sk_buff *skb2;
2543                 struct sk_buff *wskb = NULL;
2544         
2545                 skb2 = sk->send_head;
2546                 sk->send_head = NULL;
2547                 sk->send_tail = NULL;
2548         
2549                 flag |= 4;
2550         
2551                 sk->window_seq = ack + ntohs(th->window);
2552                 cli();
2553                 while (skb2 != NULL) 
2554                 {
2555                         skb = skb2;
2556                         skb2 = skb->link3;
2557                         skb->link3 = NULL;
2558                         if (after(skb->h.seq, sk->window_seq)) 
2559                         {
2560                                 if (sk->packets_out > 0) 
2561                                         sk->packets_out--;
2562                                 /* We may need to remove this from the dev send list. */
2563                                 if (skb->next != NULL) 
2564                                 {
2565                                         skb_unlink(skb);                                
2566                                 }
2567                                 /* Now add it to the write_queue. */
2568                                 if (wskb == NULL)
2569                                         skb_queue_head(&sk->write_queue,skb);
2570                                 else
2571                                         skb_append(wskb,skb);
2572                                 wskb = skb;
2573                         } 
2574                         else 
2575                         {
2576                                 if (sk->send_head == NULL) 
2577                                 {
2578                                         sk->send_head = skb;
2579                                         sk->send_tail = skb;
2580                                 }
2581                                 else
2582                                 {
2583                                         sk->send_tail->link3 = skb;
2584                                         sk->send_tail = skb;
2585                                 }
2586                                 skb->link3 = NULL;
2587                         }
2588                 }
2589                 sti();
2590         }
2591 
2592         if (sk->send_tail == NULL || sk->send_head == NULL) 
2593         {
2594                 sk->send_head = NULL;
2595                 sk->send_tail = NULL;
2596                 sk->packets_out= 0;
2597         }
2598 
2599         sk->window_seq = ack + ntohs(th->window);
2600 
2601         /* We don't want too many packets out there. */
2602         if (sk->timeout == TIME_WRITE && 
2603                 sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
2604         {
2605 /* 
2606  * This is Jacobson's slow start and congestion avoidance. 
2607  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2608  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2609  * counter and increment it once every cwnd times.  It's possible
2610  * that this should be done only if sk->retransmits == 0.  I'm
2611  * interpreting "new data is acked" as including data that has
2612  * been retransmitted but is just now being acked.
2613  */
2614                 if (sk->cong_window < sk->ssthresh)  
2615                   /* 
2616                    *    In "safe" area, increase
2617                    */
2618                         sk->cong_window++;
2619                 else 
2620                 {
2621                   /*
2622                    *    In dangerous area, increase slowly.  In theory this is
2623                    *    sk->cong_window += 1 / sk->cong_window
2624                    */
2625                         if (sk->cong_count >= sk->cong_window) 
2626                         {
2627                                 sk->cong_window++;
2628                                 sk->cong_count = 0;
2629                         }
2630                         else 
2631                                 sk->cong_count++;
2632                 }
2633         }
2634 
2635         sk->rcv_ack_seq = ack;
2636 
2637         /*
2638          * if this ack opens up a zero window, clear backoff.  It was
2639          * being used to time the probes, and is probably far higher than
2640          * it needs to be for normal retransmission.
2641          */
2642 
2643         if (sk->timeout == TIME_PROBE0) 
2644         {
2645                 if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
2646                     ! before (sk->window_seq, sk->write_queue.next->h.seq)) 
2647                 {
2648                         sk->retransmits = 0;
2649                         sk->backoff = 0;
2650                   /*
2651                    *    Recompute rto from rtt.  this eliminates any backoff.
2652                    */
2653 
2654                         sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2655                         if (sk->rto > 120*HZ)
2656                                 sk->rto = 120*HZ;
2657                         if (sk->rto < 20)       /* Was 1*HZ, then 1 - turns out we must allow about
2658                                                    .2 of a second because of BSD delayed acks - on a 100Mb/sec link
2659                                                    .2 of a second is going to need huge windows (SIGH) */
2660                                 sk->rto = 20;
2661                 }
2662         }
2663 
2664   /* 
2665    *    See if we can take anything off of the retransmit queue.
2666    */
2667    
2668         while(sk->send_head != NULL) 
2669         {
2670                 /* Check for a bug. */
2671                 if (sk->send_head->link3 &&
2672                     after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) 
2673                         printk("INET: tcp.c: *** bug send_list out of order.\n");
2674                 if (before(sk->send_head->h.seq, ack+1)) 
2675                 {
2676                         struct sk_buff *oskb;   
2677                         if (sk->retransmits) 
2678                         {       
2679                                 /*
2680                                  *      We were retransmitting.  don't count this in RTT est 
2681                                  */
2682                                 flag |= 2;
2683 
2684                                 /*
2685                                  * even though we've gotten an ack, we're still
2686                                  * retransmitting as long as we're sending from
2687                                  * the retransmit queue.  Keeping retransmits non-zero
2688                                  * prevents us from getting new data interspersed with
2689                                  * retransmissions.
2690                                  */
2691 
2692                                 if (sk->send_head->link3)
2693                                         sk->retransmits = 1;
2694                                 else
2695                                         sk->retransmits = 0;
2696                         }
2697                         /*
2698                          * Note that we only reset backoff and rto in the
2699                          * rtt recomputation code.  And that doesn't happen
2700                          * if there were retransmissions in effect.  So the
2701                          * first new packet after the retransmissions is
2702                          * sent with the backoff still in effect.  Not until
2703                          * we get an ack from a non-retransmitted packet do
2704                          * we reset the backoff and rto.  This allows us to deal
2705                          * with a situation where the network delay has increased
2706                          * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2707                          */
2708 
2709                         /*
2710                          *      We have one less packet out there. 
2711                          */
2712                          
2713                         if (sk->packets_out > 0) 
2714                                 sk->packets_out --;
2715                         /* 
2716                          *      Wake up the process, it can probably write more. 
2717                          */
2718                         if (!sk->dead) 
2719                                 sk->write_space(sk);
2720                         oskb = sk->send_head;
2721 
2722                         if (!(flag&2)) 
2723                         {
2724                                 long m;
2725         
2726                                 /*
2727                                  *      The following amusing code comes from Jacobson's
2728                                  *      article in SIGCOMM '88.  Note that rtt and mdev
2729                                  *      are scaled versions of rtt and mean deviation.
2730                                  *      This is designed to be as fast as possible 
2731                                  *      m stands for "measurement".
2732                                  */
2733         
2734                                 m = jiffies - oskb->when;  /* RTT */
2735                                 if(m<=0)
2736                                         m=1;            /* IS THIS RIGHT FOR <0 ??? */
2737                                 m -= (sk->rtt >> 3);    /* m is now error in rtt est */
2738                                 sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
2739                                 if (m < 0)
2740                                         m = -m;         /* m is now abs(error) */
2741                                 m -= (sk->mdev >> 2);   /* similar update on mdev */
2742                                 sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
2743         
2744                                 /*
2745                                  *      Now update timeout.  Note that this removes any backoff.
2746                                  */
2747                          
2748                                 sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2749                                 if (sk->rto > 120*HZ)
2750                                         sk->rto = 120*HZ;
2751                                 if (sk->rto < 20)       /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
2752                                         sk->rto = 20;
2753                                 sk->backoff = 0;
2754                         }
2755                         flag |= (2|4);
2756                         cli();
2757                         oskb = sk->send_head;
2758                         IS_SKB(oskb);
2759                         sk->send_head = oskb->link3;
2760                         if (sk->send_head == NULL) 
2761                         {
2762                                 sk->send_tail = NULL;
2763                         }
2764 
2765                 /*
2766                  *      We may need to remove this from the dev send list. 
2767                  */
2768 
2769                         if (oskb->next)
2770                                 skb_unlink(oskb);
2771                         sti();
2772                         kfree_skb(oskb, FREE_WRITE); /* write. */
2773                         if (!sk->dead) 
2774                                 sk->write_space(sk);
2775                 }
2776                 else
2777                 {
2778                         break;
2779                 }
2780         }
2781 
2782         /*
2783          * XXX someone ought to look at this too.. at the moment, if skb_peek()
2784          * returns non-NULL, we complete ignore the timer stuff in the else
2785          * clause.  We ought to organize the code so that else clause can
2786          * (should) be executed regardless, possibly moving the PROBE timer
2787          * reset over.  The skb_peek() thing should only move stuff to the
2788          * write queue, NOT also manage the timer functions.
2789          */
2790 
2791         /*
2792          * Maybe we can take some stuff off of the write queue,
2793          * and put it onto the xmit queue.
2794          */
2795         if (skb_peek(&sk->write_queue) != NULL) 
2796         {
2797                 if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
2798                         (sk->retransmits == 0 || 
2799                          sk->timeout != TIME_WRITE ||
2800                          before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
2801                         && sk->packets_out < sk->cong_window) 
2802                 {
2803                         flag |= 1;
2804                         tcp_write_xmit(sk);
2805                 }
2806                 else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
2807                         sk->send_head == NULL &&
2808                         sk->ack_backlog == 0 &&
2809                         sk->state != TCP_TIME_WAIT) 
2810                 {
2811                         reset_timer(sk, TIME_PROBE0, sk->rto);
2812                 }               
2813         }
2814         else
2815         {
2816                 /*
2817                  * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
2818                  * from TCP_CLOSE we don't do anything
2819                  *
2820                  * from anything else, if there is write data (or fin) pending,
2821                  * we use a TIME_WRITE timeout, else if keepalive we reset to
2822                  * a KEEPALIVE timeout, else we delete the timer.
2823                  *
2824                  * We do not set flag for nominal write data, otherwise we may
2825                  * force a state where we start to write itsy bitsy tidbits
2826                  * of data.
2827                  */
2828 
2829                 switch(sk->state) {
2830                 case TCP_TIME_WAIT:
2831                         /*
2832                          * keep us in TIME_WAIT until we stop getting packets,
2833                          * reset the timeout.
2834                          */
2835                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2836                         break;
2837                 case TCP_CLOSE:
2838                         /*
2839                          * don't touch the timer.
2840                          */
2841                         break;
2842                 default:
2843                         /*
2844                          * must check send_head, write_queue, and ack_backlog
2845                          * to determine which timeout to use.
2846                          */
2847                         if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) {
2848                                 reset_timer(sk, TIME_WRITE, sk->rto);
2849                         } else if (sk->keepopen) {
2850                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2851                         } else {
2852                                 delete_timer(sk);
2853                         }
2854                         break;
2855                 }
2856 #ifdef NOTDEF
2857                 if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2858                 sk->state != TCP_TIME_WAIT && !sk->keepopen) 
2859                 {
2860                         if (!sk->dead)
2861                                 sk->write_space(sk);
2862                         if (sk->keepopen) {
2863                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2864                         } else {
2865                                 delete_timer(sk);
2866                         }
2867                 }
2868                 else
2869                 {
2870                         if (sk->state != (unsigned char) sk->keepopen) 
2871                         {
2872                                 reset_timer(sk, TIME_WRITE, sk->rto);
2873                         }
2874                         if (sk->state == TCP_TIME_WAIT) 
2875                         {
2876                                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2877                         }       
2878                 }
2879 #endif
2880         }
2881 
2882         if (sk->packets_out == 0 && sk->partial != NULL &&
2883                 skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
2884         {
2885                 flag |= 1;
2886                 tcp_send_partial(sk);
2887         }
2888 
2889         /*
2890          * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
2891          * we are now waiting for an acknowledge to our FIN.  The other end is
2892          * already in TIME_WAIT.
2893          *
2894          * Move to TCP_CLOSE on success.
2895          */
2896 
2897         if (sk->state == TCP_LAST_ACK) 
2898         {
2899                 if (!sk->dead)
2900                         sk->state_change(sk);
2901                 if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) 
2902                 {
2903                         flag |= 1;
2904                         tcp_set_state(sk,TCP_CLOSE);
2905                         sk->shutdown = SHUTDOWN_MASK;
2906                 }
2907         }
2908 
2909         /*
2910          * Incoming ACK to a FIN we sent in the case of our initiating the close.
2911          *
2912          * Move to FIN_WAIT2 to await a FIN from the other end. Set
2913          * SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in.
2914          */
2915 
2916         if (sk->state == TCP_FIN_WAIT1) 
2917         {
2918 
2919                 if (!sk->dead) 
2920                         sk->state_change(sk);
2921                 if (sk->rcv_ack_seq == sk->write_seq) 
2922                 {
2923                         flag |= 1;
2924                         sk->shutdown |= SEND_SHUTDOWN;
2925                         tcp_set_state(sk,TCP_FIN_WAIT2);
2926                 }
2927         }
2928 
2929         /*
2930          *      Incoming ACK to a FIN we sent in the case of a simultaneous close.
2931          *
2932          *      Move to TIME_WAIT
2933          */
2934 
2935         if (sk->state == TCP_CLOSING) 
2936         {
2937 
2938                 if (!sk->dead) 
2939                         sk->state_change(sk);
2940                 if (sk->rcv_ack_seq == sk->write_seq) 
2941                 {
2942                         flag |= 1;
2943                         tcp_time_wait(sk);
2944                 }
2945         }
2946 
2947         /*
2948          * I make no guarantees about the first clause in the following
2949          * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
2950          * what conditions "!flag" would be true.  However I think the rest
2951          * of the conditions would prevent that from causing any
2952          * unnecessary retransmission. 
2953          *   Clearly if the first packet has expired it should be 
2954          * retransmitted.  The other alternative, "flag&2 && retransmits", is
2955          * harder to explain:  You have to look carefully at how and when the
2956          * timer is set and with what timeout.  The most recent transmission always
2957          * sets the timer.  So in general if the most recent thing has timed
2958          * out, everything before it has as well.  So we want to go ahead and
2959          * retransmit some more.  If we didn't explicitly test for this
2960          * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
2961          * would not be true.  If you look at the pattern of timing, you can
2962          * show that rto is increased fast enough that the next packet would
2963          * almost never be retransmitted immediately.  Then you'd end up
2964          * waiting for a timeout to send each packet on the retransmission
2965          * queue.  With my implementation of the Karn sampling algorithm,
2966          * the timeout would double each time.  The net result is that it would
2967          * take a hideous amount of time to recover from a single dropped packet.
2968          * It's possible that there should also be a test for TIME_WRITE, but
2969          * I think as long as "send_head != NULL" and "retransmit" is on, we've
2970          * got to be in real retransmission mode.
2971          *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
2972          * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
2973          * As long as no further losses occur, this seems reasonable.
2974          */
2975         
2976         if (((!flag) || (flag&4)) && sk->send_head != NULL &&
2977                (((flag&2) && sk->retransmits) ||
2978                (sk->send_head->when + sk->rto < jiffies))) 
2979         {
2980                 ip_do_retransmit(sk, 1);
2981                 reset_timer(sk, TIME_WRITE, sk->rto);
2982         }
2983 
2984         return(1);
2985 }
2986 
2987 
2988 /*
2989  *      This routine handles the data.  If there is room in the buffer,
2990  *      it will be have already been moved into it.  If there is no
2991  *      room, then we will just have to discard the packet.
2992  */
2993 
2994 static int tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
2995          unsigned long saddr, unsigned short len)
2996 {
2997         struct sk_buff *skb1, *skb2;
2998         struct tcphdr *th;
2999         int dup_dumped=0;
3000         unsigned long new_seq;
3001 
3002         th = skb->h.th;
3003         skb->len = len -(th->doff*4);
3004 
3005         /* The bytes in the receive read/assembly queue has increased. Needed for the
3006            low memory discard algorithm */
3007            
3008         sk->bytes_rcv += skb->len;
3009         
3010         if (skb->len == 0 && !th->fin && !th->urg && !th->psh) 
3011         {
3012                 /* 
3013                  *      Don't want to keep passing ack's back and forth. 
3014                  *      (someone sent us dataless, boring frame)
3015                  */
3016                 if (!th->ack)
3017                         tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
3018                 kfree_skb(skb, FREE_READ);
3019                 return(0);
3020         }
3021         
3022         /*
3023          *      We no longer have anyone receiving data on this connection.
3024          */
3025 
3026         if(sk->shutdown & RCV_SHUTDOWN)
3027         {
3028                 new_seq= th->seq + skb->len + th->syn;  /* Right edge of _data_ part of frame */
3029                 
3030                 if(after(new_seq,sk->acked_seq+1))      /* If the right edge of this frame is after the last copied byte
3031                                                            then it contains data we will never touch. We send an RST to 
3032                                                            ensure the far end knows it never got to the application */
3033                 {
3034                         sk->acked_seq = new_seq + th->fin;
3035                         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
3036                                 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
3037                         tcp_statistics.TcpEstabResets++;
3038                         tcp_set_state(sk,TCP_CLOSE);
3039                         sk->err = EPIPE;
3040                         sk->shutdown = SHUTDOWN_MASK;
3041                         kfree_skb(skb, FREE_READ);
3042                         if (!sk->dead)
3043                                 sk->state_change(sk);
3044                         return(0);
3045                 }
3046         }
3047         /*
3048          *      Now we have to walk the chain, and figure out where this one
3049          *      goes into it.  This is set up so that the last packet we received
3050          *      will be the first one we look at, that way if everything comes
3051          *      in order, there will be no performance loss, and if they come
3052          *      out of order we will be able to fit things in nicely.
3053          */
3054 
3055         /* 
3056          *      This should start at the last one, and then go around forwards.
3057          */
3058 
3059         if (skb_peek(&sk->receive_queue) == NULL)       /* Empty queue is easy case */
3060         {
3061                 skb_queue_head(&sk->receive_queue,skb);
3062                 skb1= NULL;
3063         } 
3064         else
3065         {
3066                 for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
3067                 {
3068                         if(sk->debug)
3069                         {
3070                                 printk("skb1=%p :", skb1);
3071                                 printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
3072                                 printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
3073                                 printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
3074                                                 sk->acked_seq);
3075                         }
3076                         
3077                         /*
3078                          *      Optimisation: Duplicate frame or extension of previous frame from
3079                          *      same sequence point (lost ack case).
3080                          *      The frame contains duplicate data or replaces a previous frame
3081                          *      discard the previous frame (safe as sk->inuse is set) and put
3082                          *      the new one in its place.
3083                          */
3084                          
3085                         if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
3086                         {
3087                                 skb_append(skb1,skb);
3088                                 skb_unlink(skb1);
3089                                 kfree_skb(skb1,FREE_READ);
3090                                 dup_dumped=1;
3091                                 skb1=NULL;
3092                                 break;
3093                         }
3094                         
3095                         /*
3096                          *      Found where it fits
3097                          */
3098                          
3099                         if (after(th->seq+1, skb1->h.th->seq))
3100                         {
3101                                 skb_append(skb1,skb);
3102                                 break;
3103                         }
3104                         
3105                         /*
3106                          *      See if we've hit the start. If so insert.
3107                          */
3108                         if (skb1 == skb_peek(&sk->receive_queue))
3109                         {
3110                                 skb_queue_head(&sk->receive_queue, skb);
3111                                 break;
3112                         }
3113                 }
3114         }
3115 
3116         /*
3117          *      Figure out what the ack value for this frame is
3118          */
3119          
3120         th->ack_seq = th->seq + skb->len;
3121         if (th->syn) 
3122                 th->ack_seq++;
3123         if (th->fin)
3124                 th->ack_seq++;
3125 
3126         if (before(sk->acked_seq, sk->copied_seq)) 
3127         {
3128                 printk("*** tcp.c:tcp_data bug acked < copied\n");
3129                 sk->acked_seq = sk->copied_seq;
3130         }
3131 
3132         /*
3133          *      Now figure out if we can ack anything.
3134          */
3135 
3136         if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) 
3137         {
3138                 if (before(th->seq, sk->acked_seq+1)) 
3139                 {
3140                         int newwindow;
3141 
3142                         if (after(th->ack_seq, sk->acked_seq)) 
3143                         {
3144                                 newwindow = sk->window-(th->ack_seq - sk->acked_seq);
3145                                 if (newwindow < 0)
3146                                         newwindow = 0;  
3147                                 sk->window = newwindow;
3148                                 sk->acked_seq = th->ack_seq;
3149                         }
3150                         skb->acked = 1;
3151 
3152                         /* 
3153                          *      When we ack the fin, we turn on the RCV_SHUTDOWN flag.
3154                          */
3155 
3156                         if (skb->h.th->fin) 
3157                         {
3158                                 if (!sk->dead) 
3159                                         sk->state_change(sk);
3160                                 sk->shutdown |= RCV_SHUTDOWN;
3161                         }
3162           
3163                         for(skb2 = skb->next;
3164                             skb2 != (struct sk_buff *)&sk->receive_queue;
3165                             skb2 = skb2->next) 
3166                         {
3167                                 if (before(skb2->h.th->seq, sk->acked_seq+1)) 
3168                                 {
3169                                         if (after(skb2->h.th->ack_seq, sk->acked_seq))
3170                                         {
3171                                                 newwindow = sk->window -
3172                                                  (skb2->h.th->ack_seq - sk->acked_seq);
3173                                                 if (newwindow < 0)
3174                                                         newwindow = 0;  
3175                                                 sk->window = newwindow;
3176                                                 sk->acked_seq = skb2->h.th->ack_seq;
3177                                         }
3178                                         skb2->acked = 1;
3179                                         /*
3180                                          *      When we ack the fin, we turn on
3181                                          *      the RCV_SHUTDOWN flag.
3182                                          */
3183                                         if (skb2->h.th->fin) 
3184                                         {
3185                                                 sk->shutdown |= RCV_SHUTDOWN;
3186                                                 if (!sk->dead)
3187                                                         sk->state_change(sk);
3188                                         }
3189 
3190                                         /*
3191                                          *      Force an immediate ack.
3192                                          */
3193                                          
3194                                         sk->ack_backlog = sk->max_ack_backlog;
3195                                 }
3196                                 else
3197                                 {
3198                                         break;
3199                                 }
3200                         }
3201 
3202                         /*
3203                          *      This also takes care of updating the window.
3204                          *      This if statement needs to be simplified.
3205                          */
3206                         if (!sk->delay_acks ||
3207                             sk->ack_backlog >= sk->max_ack_backlog || 
3208                             sk->bytes_rcv > sk->max_unacked || th->fin) {
3209         /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
3210                         }
3211                         else 
3212                         {
3213                                 sk->ack_backlog++;
3214                                 if(sk->debug)
3215                                         printk("Ack queued.\n");
3216                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3217                         }
3218                 }
3219         }
3220 
3221         /*
3222          *      If we've missed a packet, send an ack.
3223          *      Also start a timer to send another.
3224          */
3225          
3226         if (!skb->acked) 
3227         {
3228         
3229         /*
3230          *      This is important.  If we don't have much room left,
3231          *      we need to throw out a few packets so we have a good
3232          *      window.  Note that mtu is used, not mss, because mss is really
3233          *      for the send side.  He could be sending us stuff as large as mtu.
3234          */
3235                  
3236                 while (sk->prot->rspace(sk) < sk->mtu) 
3237                 {
3238                         skb1 = skb_peek(&sk->receive_queue);
3239                         if (skb1 == NULL) 
3240                         {
3241                                 printk("INET: tcp.c:tcp_data memory leak detected.\n");
3242                                 break;
3243                         }
3244 
3245                         /*
3246                          *      Don't throw out something that has been acked. 
3247                          */
3248                  
3249                         if (skb1->acked) 
3250                         {
3251                                 break;
3252                         }
3253                 
3254                         skb_unlink(skb1);
3255                         kfree_skb(skb1, FREE_READ);
3256                 }
3257                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3258                 sk->ack_backlog++;
3259                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3260         }
3261         else
3262         {
3263                 /* We missed a packet.  Send an ack to try to resync things. */
3264                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3265         }
3266 
3267         /*
3268          *      Now tell the user we may have some data. 
3269          */
3270          
3271         if (!sk->dead) 
3272         {
3273                 if(sk->debug)
3274                         printk("Data wakeup.\n");
3275                 sk->data_ready(sk,0);
3276         } 
3277         return(0);
3278 }
3279 
3280 
3281 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
3282 {
3283         unsigned long ptr = ntohs(th->urg_ptr);
3284 
3285         if (ptr)
3286                 ptr--;
3287         ptr += th->seq;
3288 
3289         /* ignore urgent data that we've already seen and read */
3290         if (after(sk->copied_seq+1, ptr))
3291                 return;
3292 
3293         /* do we already have a newer (or duplicate) urgent pointer? */
3294         if (sk->urg_data && !after(ptr, sk->urg_seq))
3295                 return;
3296 
3297         /* tell the world about our new urgent pointer */
3298         if (sk->proc != 0) {
3299                 if (sk->proc > 0) {
3300                         kill_proc(sk->proc, SIGURG, 1);
3301                 } else {
3302                         kill_pg(-sk->proc, SIGURG, 1);
3303                 }
3304         }
3305         sk->urg_data = URG_NOTYET;
3306         sk->urg_seq = ptr;
3307 }
3308 
3309 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
3310         unsigned long saddr, unsigned long len)
3311 {
3312         unsigned long ptr;
3313 
3314         /* check if we get a new urgent pointer */
3315         if (th->urg)
3316                 tcp_check_urg(sk,th);
3317 
3318         /* do we wait for any urgent data? */
3319         if (sk->urg_data != URG_NOTYET)
3320                 return 0;
3321 
3322         /* is the urgent pointer pointing into this packet? */
3323         ptr = sk->urg_seq - th->seq + th->doff*4;
3324         if (ptr >= len)
3325                 return 0;
3326 
3327         /* ok, got the correct packet, update info */
3328         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
3329         if (!sk->dead)
3330                 sk->data_ready(sk,0);
3331         return 0;
3332 }
3333 
3334 
3335 /*
3336  *  This deals with incoming fins. 'Linus at 9 O'clock' 8-) 
3337  *
3338  *  If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
3339  *  (and thence onto LAST-ACK and finally, CLOSE, we never enter
3340  *  TIME-WAIT)
3341  *
3342  *  If we are in FINWAIT-1, a received FIN indicates simultaneous
3343  *  close and we go into CLOSING (and later onto TIME-WAIT)
3344  *
3345  *  If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3346  *
3347  */
3348 static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
3349          unsigned long saddr, struct device *dev)
3350 {
3351         sk->fin_seq = th->seq + skb->len + th->syn + th->fin;
3352 
3353         if (!sk->dead) 
3354         {
3355                 sk->state_change(sk);
3356         }
3357 
3358         switch(sk->state) 
3359         {
3360                 case TCP_SYN_RECV:
3361                 case TCP_SYN_SENT:
3362                 case TCP_ESTABLISHED:
3363                         /*
3364                          * move to CLOSE_WAIT, tcp_data() already handled
3365                          * sending the ack.
3366                          */
3367                         reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
3368                         tcp_set_state(sk,TCP_CLOSE_WAIT);
3369                         if (th->rst)
3370                                 sk->shutdown = SHUTDOWN_MASK;
3371                         break;
3372 
3373                 case TCP_CLOSE_WAIT:
3374                 case TCP_CLOSING:
3375                         /*
3376                          * received a retransmission of the FIN, do
3377                          * nothing.
3378                          */
3379                         break;
3380                 case TCP_TIME_WAIT:
3381                         /*
3382                          * received a retransmission of the FIN,
3383                          * restart the TIME_WAIT timer.
3384                          */
3385                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3386                         return(0);
3387                 case TCP_FIN_WAIT1:
3388                         /*
3389                          * This case occurs when a simultaneous close
3390                          * happens, we must ack the received FIN and
3391                          * enter the CLOSING state.
3392                          *
3393                          * XXX timeout not set properly
3394                          */
3395 
3396                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3397                         tcp_set_state(sk,TCP_CLOSING);
3398                         break;
3399                 case TCP_FIN_WAIT2:
3400                         /*
3401                          * received a FIN -- send ACK and enter TIME_WAIT
3402                          */
3403                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3404                         sk->shutdown|=SHUTDOWN_MASK;
3405                         tcp_set_state(sk,TCP_TIME_WAIT);
3406                         break;
3407                 case TCP_CLOSE:
3408                         /*
3409                          * already in CLOSE
3410                          */
3411                         break;
3412                 default:
3413                         tcp_set_state(sk,TCP_LAST_ACK);
3414         
3415                         /* Start the timers. */
3416                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3417                         return(0);
3418         }
3419         sk->ack_backlog++;
3420 
3421         return(0);
3422 }
3423 
3424 
3425 /* This will accept the next outstanding connection. */
3426 static struct sock *
3427 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
3428 {
3429         struct sock *newsk;
3430         struct sk_buff *skb;
3431   
3432   /*
3433    * We need to make sure that this socket is listening,
3434    * and that it has something pending.
3435    */
3436 
3437         if (sk->state != TCP_LISTEN) 
3438         {
3439                 sk->err = EINVAL;
3440                 return(NULL); 
3441         }
3442 
3443         /* Avoid the race. */
3444         cli();
3445         sk->inuse = 1;
3446 
3447         while((skb = tcp_dequeue_established(sk)) == NULL) 
3448         {
3449                 if (flags & O_NONBLOCK) 
3450                 {
3451                         sti();
3452                         release_sock(sk);
3453                         sk->err = EAGAIN;
3454                         return(NULL);
3455                 }
3456 
3457                 release_sock(sk);
3458                 interruptible_sleep_on(sk->sleep);
3459                 if (current->signal & ~current->blocked) 
3460                 {
3461                         sti();
3462                         sk->err = ERESTARTSYS;
3463                         return(NULL);
3464                 }
3465                 sk->inuse = 1;
3466         }
3467         sti();
3468 
3469         /*
3470          *      Now all we need to do is return skb->sk. 
3471          */
3472 
3473         newsk = skb->sk;
3474 
3475         kfree_skb(skb, FREE_READ);
3476         sk->ack_backlog--;
3477         release_sock(sk);
3478         return(newsk);
3479 }
3480 
3481 
3482 /*
3483  *      This will initiate an outgoing connection. 
3484  */
3485  
3486 static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3487 {
3488         struct sk_buff *buff;
3489         struct device *dev=NULL;
3490         unsigned char *ptr;
3491         int tmp;
3492         struct tcphdr *t1;
3493         struct rtable *rt;
3494 
3495         if (sk->state != TCP_CLOSE) 
3496                 return(-EISCONN);
3497 
3498         if (addr_len < 8) 
3499                 return(-EINVAL);
3500 
3501         if (usin->sin_family && usin->sin_family != AF_INET) 
3502                 return(-EAFNOSUPPORT);
3503 
3504         /*
3505          *      connect() to INADDR_ANY means loopback (BSD'ism).
3506          */
3507         
3508         if(usin->sin_addr.s_addr==INADDR_ANY)
3509                 usin->sin_addr.s_addr=ip_my_addr();
3510                   
3511         /*
3512          *      Don't want a TCP connection going to a broadcast address 
3513          */
3514 
3515         if (ip_chk_addr(usin->sin_addr.s_addr) == IS_BROADCAST) 
3516         { 
3517                 return -ENETUNREACH;
3518         }
3519   
3520         /*
3521          *      Connect back to the same socket: Blows up so disallow it 
3522          */
3523 
3524         if(sk->saddr == usin->sin_addr.s_addr && sk->num==ntohs(usin->sin_port))
3525                 return -EBUSY;
3526 
3527         sk->inuse = 1;
3528         sk->daddr = usin->sin_addr.s_addr;
3529         sk->write_seq = jiffies * SEQ_TICK - seq_offset;
3530         sk->window_seq = sk->write_seq;
3531         sk->rcv_ack_seq = sk->write_seq -1;
3532         sk->err = 0;
3533         sk->dummy_th.dest = usin->sin_port;
3534         release_sock(sk);
3535 
3536         buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3537         if (buff == NULL) 
3538         {
3539                 return(-ENOMEM);
3540         }
3541         sk->inuse = 1;
3542         buff->len = 24;
3543         buff->sk = sk;
3544         buff->free = 1;
3545         buff->localroute = sk->localroute;
3546         
3547         t1 = (struct tcphdr *) buff->data;
3548 
3549         /*
3550          *      Put in the IP header and routing stuff. 
3551          */
3552          
3553         rt=ip_rt_route(sk->daddr, NULL, NULL);
3554         
3555 
3556         /*
3557          *      We need to build the routing stuff from the things saved in skb. 
3558          */
3559 
3560         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3561                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3562         if (tmp < 0) 
3563         {
3564                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3565                 release_sock(sk);
3566                 return(-ENETUNREACH);
3567         }
3568 
3569         buff->len += tmp;
3570         t1 = (struct tcphdr *)((char *)t1 +tmp);
3571 
3572         memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3573         t1->seq = ntohl(sk->write_seq++);
3574         sk->sent_seq = sk->write_seq;
3575         buff->h.seq = sk->write_seq;
3576         t1->ack = 0;
3577         t1->window = 2;
3578         t1->res1=0;
3579         t1->res2=0;
3580         t1->rst = 0;
3581         t1->urg = 0;
3582         t1->psh = 0;
3583         t1->syn = 1;
3584         t1->urg_ptr = 0;
3585         t1->doff = 6;
3586         /* use 512 or whatever user asked for */
3587         
3588         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
3589                 sk->window_clamp=rt->rt_window;
3590         else
3591                 sk->window_clamp=0;
3592 
3593         if (sk->user_mss)
3594                 sk->mtu = sk->user_mss;
3595         else if(rt!=NULL && (rt->rt_flags&RTF_MTU))
3596                 sk->mtu = rt->rt_mss;
3597         else 
3598         {
3599 #ifdef CONFIG_INET_SNARL
3600                 if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3601 #else
3602                 if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3603 #endif
3604                         sk->mtu = 576 - HEADER_SIZE;
3605                 else
3606                         sk->mtu = MAX_WINDOW;
3607         }
3608         /*
3609          *      but not bigger than device MTU 
3610          */
3611 
3612         if(sk->mtu <32)
3613                 sk->mtu = 32;   /* Sanity limit */
3614                 
3615         sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3616         
3617         /*
3618          *      Put in the TCP options to say MTU. 
3619          */
3620 
3621         ptr = (unsigned char *)(t1+1);
3622         ptr[0] = 2;
3623         ptr[1] = 4;
3624         ptr[2] = (sk->mtu) >> 8;
3625         ptr[3] = (sk->mtu) & 0xff;
3626         tcp_send_check(t1, sk->saddr, sk->daddr,
3627                   sizeof(struct tcphdr) + 4, sk);
3628 
3629         /*
3630          *      This must go first otherwise a really quick response will get reset. 
3631          */
3632 
3633         tcp_set_state(sk,TCP_SYN_SENT);
3634         sk->rto = TCP_TIMEOUT_INIT;
3635         reset_timer(sk, TIME_WRITE, sk->rto);   /* Timer for repeating the SYN until an answer */
3636         sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3637 
3638         sk->prot->queue_xmit(sk, dev, buff, 0);  
3639         tcp_statistics.TcpActiveOpens++;
3640         tcp_statistics.TcpOutSegs++;
3641   
3642         release_sock(sk);
3643         return(0);
3644 }
3645 
3646 
3647 /* This functions checks to see if the tcp header is actually acceptable. */
3648 static int
3649 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3650              struct options *opt, unsigned long saddr, struct device *dev)
3651 {
3652         unsigned long next_seq;
3653 
3654         next_seq = len - 4*th->doff;
3655         if (th->fin)
3656                 next_seq++;
3657         /* if we have a zero window, we can't have any data in the packet.. */
3658         if (next_seq && !sk->window)
3659                 goto ignore_it;
3660         next_seq += th->seq;
3661 
3662         /*
3663          * This isn't quite right.  sk->acked_seq could be more recent
3664          * than sk->window.  This is however close enough.  We will accept
3665          * slightly more packets than we should, but it should not cause
3666          * problems unless someone is trying to forge packets.
3667          */
3668 
3669         /* have we already seen all of this packet? */
3670         if (!after(next_seq+1, sk->acked_seq))
3671                 goto ignore_it;
3672         /* or does it start beyond the window? */
3673         if (!before(th->seq, sk->acked_seq + sk->window + 1))
3674                 goto ignore_it;
3675 
3676         /* ok, at least part of this packet would seem interesting.. */
3677         return 1;
3678 
3679 ignore_it:
3680         if (th->rst)
3681                 return 0;
3682 
3683         /*
3684          *      Send a reset if we get something not ours and we are
3685          *      unsynchronized. Note: We don't do anything to our end. We
3686          *      are just killing the bogus remote connection then we will
3687          *      connect again and it will work (with luck).
3688          */
3689          
3690         if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) {
3691                 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3692                 return 1;
3693         }
3694 
3695         /* Try to resync things. */
3696         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3697         return 0;
3698 }
3699 
3700 
3701 #ifdef TCP_FASTPATH
3702 /*
3703  *      Is the end of the queue clear of fragments as yet unmerged into the data stream
3704  *      Yes if
3705  *      a) The queue is empty
3706  *      b) The last frame on the queue has the acked flag set
3707  */
3708 
3709 static inline int tcp_clean_end(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3710 {
3711         struct sk_buff *skb=skb_peek(&sk->receive_queue);
3712         if(skb==NULL || sk->receive_queue.prev->acked)
3713                 return 1;
3714 }
3715 
3716 #endif
3717 
3718 int
3719 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3720         unsigned long daddr, unsigned short len,
3721         unsigned long saddr, int redo, struct inet_protocol * protocol)
3722 {
3723         struct tcphdr *th;
3724         struct sock *sk;
3725 
3726         if (!skb) 
3727         {
3728                 return(0);
3729         }
3730 
3731         if (!dev) 
3732         {
3733                 return(0);
3734         }
3735   
3736         tcp_statistics.TcpInSegs++;
3737   
3738         if(skb->pkt_type!=PACKET_HOST)
3739         {
3740                 kfree_skb(skb,FREE_READ);
3741                 return(0);
3742         }
3743   
3744         th = skb->h.th;
3745 
3746         /*
3747          *      Find the socket.
3748          */
3749 
3750         sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3751 
3752         /*
3753          *      If this socket has got a reset its to all intents and purposes 
3754          *      really dead 
3755          */
3756          
3757         if (sk!=NULL && sk->zapped)
3758                 sk=NULL;
3759 
3760         if (!redo) 
3761         {
3762                 if (tcp_check(th, len, saddr, daddr )) 
3763                 {
3764                         skb->sk = NULL;
3765                         kfree_skb(skb,FREE_READ);
3766                         /*
3767                          * We don't release the socket because it was
3768                          * never marked in use.
3769                          */
3770                         return(0);
3771                 }
3772                 th->seq = ntohl(th->seq);
3773 
3774                 /* See if we know about the socket. */
3775                 if (sk == NULL) 
3776                 {
3777                         if (!th->rst)
3778                                 tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3779                         skb->sk = NULL;
3780                         kfree_skb(skb, FREE_READ);
3781                         return(0);
3782                 }
3783 
3784                 skb->len = len;
3785                 skb->sk = sk;
3786                 skb->acked = 0;
3787                 skb->used = 0;
3788                 skb->free = 0;
3789                 skb->saddr = daddr;
3790                 skb->daddr = saddr;
3791         
3792                 /* We may need to add it to the backlog here. */
3793                 cli();
3794                 if (sk->inuse) 
3795                 {
3796                         skb_queue_head(&sk->back_log, skb);
3797                         sti();
3798                         return(0);
3799                 }
3800                 sk->inuse = 1;
3801                 sti();
3802         }
3803         else
3804         {
3805                 if (!sk) 
3806                 {
3807                         return(0);
3808                 }
3809         }
3810 
3811 
3812         if (!sk->prot) 
3813         {
3814                 return(0);
3815         }
3816 
3817 
3818         /*
3819          *      Charge the memory to the socket. 
3820          */
3821          
3822         if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) 
3823         {
3824                 skb->sk = NULL;
3825                 kfree_skb(skb, FREE_READ);
3826                 release_sock(sk);
3827                 return(0);
3828         }
3829 
3830         sk->rmem_alloc += skb->mem_len;
3831 
3832 #ifdef TCP_FASTPATH
3833 /*
3834  *      Incoming data stream fastpath. 
3835  *
3836  *      We try to optimise two things.
3837  *      1) Spot general data arriving without funny options and skip extra checks and the switch.
3838  *      2) Spot the common case in raw data receive streams of a packet that has no funny options,
3839  *      fits exactly on the end of the current queue and may or may not have the ack bit set.
3840  *
3841  *      Case two especially is done inline in this routine so there are no long jumps causing heavy
3842  *      cache thrashing, no function call overhead (except for the ack sending if needed) and for
3843  *      speed although further optimizing here is possible.
3844  */
3845  
3846         /* I'm trusting gcc to optimise this sensibly... might need judicious application of a software mallet */
3847         if(!(sk->shutdown & RCV_SHUTDOWN) && sk->state==TCP_ESTABLISHED && !th->urg && !th->syn && !th->fin && !th->rst)
3848         {       
3849                 /* Packets in order. Fits window */
3850                 if(th->seq == sk->acked_seq+1 && sk->window && tcp_clean_end(sk))
3851                 {
3852                         /* Ack is harder */
3853                         if(th->ack && !tcp_ack(sk, th, saddr, len))
3854                         {
3855                                 kfree_skb(skb, FREE_READ);
3856                                 release_sock(sk);
3857                                 return 0;
3858                         }
3859                         /*
3860                          *      Set up variables
3861                          */
3862                         skb->len -= (th->doff *4);
3863                         sk->bytes_rcv += skb->len;
3864                         tcp_rx_hit2++;
3865                         if(skb->len)
3866                         {
3867                                 skb_queue_tail(&sk->receive_queue,skb); /* We already know where to put it */
3868                                 if(sk->window >= skb->len)
3869                                         sk->window-=skb->len;                   /* We know its effect on the window */
3870                                 else
3871                                         sk->window=0;
3872                                 sk->acked_seq = th->seq+skb->len;       /* Easy */
3873                                 skb->acked=1;                           /* Guaranteed true */
3874                                 if(!sk->delay_acks || sk->ack_backlog >= sk->max_ack_backlog || 
3875                                         sk->bytes_rcv > sk->max_unacked)
3876                                 {
3877                                         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th , saddr);
3878                                 }
3879                                 else
3880                                 {
3881                                         sk->ack_backlog++;
3882                                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3883                                 }
3884                                 if(!sk->dead)
3885                                         sk->data_ready(sk,0);
3886                                 release_sock(sk);
3887                                 return 0;
3888                         }
3889                 }
3890                 /*
3891                  *      More generic case of arriving data stream in ESTABLISHED
3892                  */
3893                 tcp_rx_hit1++;
3894                 if(!tcp_sequence(sk, th, len, opt, saddr, dev))
3895                 {
3896                         kfree_skb(skb, FREE_READ);
3897                         release_sock(sk);
3898                         return 0;
3899                 }
3900                 if(th->ack && !tcp_ack(sk, th, saddr, len))
3901                 {
3902                         kfree_skb(skb, FREE_READ);
3903                         release_sock(sk);
3904                         return 0;
3905                 }
3906                 if(tcp_data(skb, sk, saddr, len))
3907                         kfree_skb(skb, FREE_READ);
3908                 release_sock(sk);
3909                 return 0;
3910         }
3911         tcp_rx_miss++;
3912 #endif  
3913 
3914         /*
3915          *      Now deal with all cases.
3916          */
3917          
3918         switch(sk->state) 
3919         {
3920         
3921                 /*
3922                  * This should close the system down if it's waiting
3923                  * for an ack that is never going to be sent.
3924                  */
3925                 case TCP_LAST_ACK:
3926                         if (th->rst) 
3927                         {
3928                                 sk->zapped=1;
3929                                 sk->err = ECONNRESET;
3930                                 tcp_set_state(sk,TCP_CLOSE);
3931                                 sk->shutdown = SHUTDOWN_MASK;
3932                                 if (!sk->dead) 
3933                                 {
3934                                         sk->state_change(sk);
3935                                 }
3936                                 kfree_skb(skb, FREE_READ);
3937                                 release_sock(sk);
3938                                 return(0);
3939                         }
3940 
3941                 case TCP_ESTABLISHED:
3942                 case TCP_CLOSE_WAIT:
3943                 case TCP_CLOSING:
3944                 case TCP_FIN_WAIT1:
3945                 case TCP_FIN_WAIT2:
3946                 case TCP_TIME_WAIT:
3947                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
3948                         {
3949                                 kfree_skb(skb, FREE_READ);
3950                                 release_sock(sk);
3951                                 return(0);
3952                         }
3953 
3954                         if (th->rst) 
3955                         {
3956                                 tcp_statistics.TcpEstabResets++;
3957                                 sk->zapped=1;
3958                                 /* This means the thing should really be closed. */
3959                                 sk->err = ECONNRESET;
3960                                 if (sk->state == TCP_CLOSE_WAIT) 
3961                                 {
3962                                         sk->err = EPIPE;
3963                                 }
3964         
3965                                 /*
3966                                  * A reset with a fin just means that
3967                                  * the data was not all read.
3968                                  */
3969                                 tcp_set_state(sk,TCP_CLOSE);
3970                                 sk->shutdown = SHUTDOWN_MASK;
3971                                 if (!sk->dead) 
3972                                 {
3973                                         sk->state_change(sk);
3974                                 }
3975                                 kfree_skb(skb, FREE_READ);
3976                                 release_sock(sk);
3977                                 return(0);
3978                         }
3979                         if (th->syn) 
3980                         {
3981                                 tcp_statistics.TcpEstabResets++;
3982                                 sk->err = ECONNRESET;
3983                                 tcp_set_state(sk,TCP_CLOSE);
3984                                 sk->shutdown = SHUTDOWN_MASK;
3985                                 tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
3986                                 if (!sk->dead) {
3987                                         sk->state_change(sk);
3988                                 }
3989                                 kfree_skb(skb, FREE_READ);
3990                                 release_sock(sk);
3991                                 return(0);
3992                         }
3993         
3994                         if (th->ack && !tcp_ack(sk, th, saddr, len)) {
3995                                 kfree_skb(skb, FREE_READ);
3996                                 release_sock(sk);
3997                                 return(0);
3998                         }
3999         
4000                         if (tcp_urg(sk, th, saddr, len)) {
4001                                 kfree_skb(skb, FREE_READ);
4002                                 release_sock(sk);
4003                                 return(0);
4004                         }
4005 
4006         
4007                         if (tcp_data(skb, sk, saddr, len)) {
4008                                 kfree_skb(skb, FREE_READ);
4009                                 release_sock(sk);
4010                                 return(0);
4011                         }       
4012 
4013                         if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) {
4014                                 kfree_skb(skb, FREE_READ);
4015                                 release_sock(sk);
4016                                 return(0);
4017                         }
4018         
4019                         release_sock(sk);
4020                         return(0);
4021                 
4022                 case TCP_CLOSE:
4023                         if (sk->dead || sk->daddr) {
4024                                 kfree_skb(skb, FREE_READ);
4025                                         release_sock(sk);
4026                                 return(0);
4027                         }
4028         
4029                         if (!th->rst) {
4030                                 if (!th->ack)
4031                                         th->ack_seq = 0;
4032                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4033                         }
4034                         kfree_skb(skb, FREE_READ);
4035                         release_sock(sk);
4036                                 return(0);
4037         
4038                 case TCP_LISTEN:
4039                         if (th->rst) {
4040                                 kfree_skb(skb, FREE_READ);
4041                                 release_sock(sk);
4042                                 return(0);
4043                         }
4044                         if (th->ack) {
4045                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4046                                 kfree_skb(skb, FREE_READ);
4047                                 release_sock(sk);
4048                                 return(0);
4049                         }
4050         
4051                         if (th->syn) 
4052                         {
4053                                 /*
4054                                  * Now we just put the whole thing including
4055                                  * the header and saddr, and protocol pointer
4056                                  * into the buffer.  We can't respond until the
4057                                  * user tells us to accept the connection.
4058                                  */
4059                                 tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
4060                                 release_sock(sk);
4061                                 return(0);
4062                         }
4063 
4064                         kfree_skb(skb, FREE_READ);
4065                         release_sock(sk);
4066                         return(0);
4067 
4068                 case TCP_SYN_RECV:
4069                         if (th->syn) {
4070                                 /* Probably a retransmitted syn */
4071                                 kfree_skb(skb, FREE_READ);
4072                                 release_sock(sk);
4073                                 return(0);
4074                         }
4075         
4076         
4077                 default:
4078                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
4079                         {
4080                                 kfree_skb(skb, FREE_READ);
4081                                 release_sock(sk);
4082                                 return(0);
4083                         }
4084         
4085                 case TCP_SYN_SENT:
4086                         if (th->rst) 
4087                         {
4088                                 tcp_statistics.TcpAttemptFails++;
4089                                 sk->err = ECONNREFUSED;
4090                                 tcp_set_state(sk,TCP_CLOSE);
4091                                 sk->shutdown = SHUTDOWN_MASK;
4092                                 sk->zapped = 1;
4093                                 if (!sk->dead) 
4094                                 {
4095                                         sk->state_change(sk);
4096                                 }
4097                                 kfree_skb(skb, FREE_READ);
4098                                 release_sock(sk);
4099                                 return(0);
4100                         }
4101                         if (!th->ack) 
4102                         {
4103                                 if (th->syn) 
4104                                 {
4105                                         tcp_set_state(sk,TCP_SYN_RECV);
4106                                 }
4107                                 kfree_skb(skb, FREE_READ);
4108                                 release_sock(sk);
4109                                 return(0);
4110                         }
4111         
4112                         switch(sk->state) 
4113                         {
4114                                 case TCP_SYN_SENT:
4115                                         if (!tcp_ack(sk, th, saddr, len)) 
4116                                         {
4117                                                 tcp_statistics.TcpAttemptFails++;
4118                                                 tcp_reset(daddr, saddr, th,
4119                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4120                                                 kfree_skb(skb, FREE_READ);
4121                                                         release_sock(sk);
4122                                                 return(0);
4123                                         }
4124         
4125                                         /*
4126                                          * If the syn bit is also set, switch to
4127                                          * tcp_syn_recv, and then to established.
4128                                          */
4129                                         if (!th->syn) 
4130                                         {
4131                                                 kfree_skb(skb, FREE_READ);
4132                                                 release_sock(sk);
4133                                                 return(0);
4134                                         }
4135         
4136                                         /* Ack the syn and fall through. */
4137                                         sk->acked_seq = th->seq+1;
4138                                         sk->fin_seq = th->seq;
4139                                         tcp_send_ack(sk->sent_seq, th->seq+1,
4140                                                 sk, th, sk->daddr);
4141                 
4142                                 case TCP_SYN_RECV:
4143                                         if (!tcp_ack(sk, th, saddr, len)) 
4144                                         {
4145                                                 tcp_statistics.TcpAttemptFails++;
4146                                                 tcp_reset(daddr, saddr, th,
4147                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
4148                                                 kfree_skb(skb, FREE_READ);
4149                                                 release_sock(sk);
4150                                                 return(0);
4151                                         }
4152         
4153                                         tcp_set_state(sk,TCP_ESTABLISHED);
4154         
4155                                         /*
4156                                          *      Now we need to finish filling out
4157                                          *      some of the tcp header.
4158                                          * 
4159                                          *      We need to check for mtu info. 
4160                                          */
4161                                         tcp_options(sk, th);
4162                                         sk->dummy_th.dest = th->source;
4163                                         sk->copied_seq = sk->acked_seq-1;
4164                                         if (!sk->dead) 
4165                                         {
4166                                                 sk->state_change(sk);
4167                                         }
4168         
4169                                         /*
4170                                          * We've already processed his first
4171                                          * ack.  In just about all cases that
4172                                          * will have set max_window.  This is
4173                                          * to protect us against the possibility
4174                                          * that the initial window he sent was 0.
4175                                          * This must occur after tcp_options, which
4176                                          * sets sk->mtu.
4177                                          */
4178                                         if (sk->max_window == 0) 
4179                                         {
4180                                                 sk->max_window = 32;
4181                                                 sk->mss = min(sk->max_window, sk->mtu);
4182                                         }
4183 
4184                                         /*
4185                                          * Now process the rest like we were
4186                                          * already in the established state.
4187                                          */
4188                                         if (th->urg) 
4189                                         {
4190                                                 if (tcp_urg(sk, th, saddr, len)) 
4191                                                 { 
4192                                                         kfree_skb(skb, FREE_READ);
4193                                                         release_sock(sk);
4194                                                         return(0);
4195                                                 }
4196                                         }
4197                                         if (tcp_data(skb, sk, saddr, len))
4198                                                 kfree_skb(skb, FREE_READ);
4199 
4200                                         if (th->fin)
4201                                                 tcp_fin(skb, sk, th, saddr, dev);
4202                                         release_sock(sk);
4203                                         return(0);
4204                         }
4205         
4206                         if (th->urg) 
4207                         {
4208                                 if (tcp_urg(sk, th, saddr, len)) 
4209                                 {
4210                                         kfree_skb(skb, FREE_READ);
4211                                         release_sock(sk);
4212                                         return(0);
4213                                 }
4214                         }
4215                         if (tcp_data(skb, sk, saddr, len)) 
4216                         {
4217                                 kfree_skb(skb, FREE_READ);
4218                                 release_sock(sk);
4219                                 return(0);
4220                         }
4221         
4222                         if (!th->fin) 
4223                         {
4224                                 release_sock(sk);
4225                                 return(0);
4226                         }
4227                         tcp_fin(skb, sk, th, saddr, dev);
4228                         release_sock(sk);
4229                         return(0);
4230         }
4231 }
4232 
4233 
4234 /*
4235  * This routine sends a packet with an out of date sequence
4236  * number. It assumes the other end will try to ack it.
4237  */
4238 
4239 static void tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4240 {
4241         struct sk_buff *buff;
4242         struct tcphdr *t1;
4243         struct device *dev=NULL;
4244         int tmp;
4245 
4246         if (sk->zapped)
4247                 return; /* After a valid reset we can send no more */
4248 
4249         /*
4250          * Write data can still be transmitted/retransmitted in the
4251          * following states.  If any other state is encountered, return.
4252          */
4253 
4254         if (sk->state != TCP_ESTABLISHED && 
4255             sk->state != TCP_CLOSE_WAIT &&
4256             sk->state != TCP_FIN_WAIT1 && 
4257             sk->state != TCP_LAST_ACK &&
4258             sk->state != TCP_CLOSING
4259         ) {
4260                 return;
4261         }
4262 
4263         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
4264         if (buff == NULL) 
4265                 return;
4266 
4267         buff->len = sizeof(struct tcphdr);
4268         buff->free = 1;
4269         buff->sk = sk;
4270         buff->localroute = sk->localroute;
4271 
4272         t1 = (struct tcphdr *) buff->data;
4273 
4274         /* Put in the IP header and routing stuff. */
4275         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
4276                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
4277         if (tmp < 0) 
4278         {
4279                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
4280                 return;
4281         }
4282 
4283         buff->len += tmp;
4284         t1 = (struct tcphdr *)((char *)t1 +tmp);
4285 
4286         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
4287 
4288         /*
4289          * Use a previous sequence.
4290          * This should cause the other end to send an ack.
4291          */
4292         t1->seq = htonl(sk->sent_seq-1);
4293         t1->ack = 1; 
4294         t1->res1= 0;
4295         t1->res2= 0;
4296         t1->rst = 0;
4297         t1->urg = 0;
4298         t1->psh = 0;
4299         t1->fin = 0;
4300         t1->syn = 0;
4301         t1->ack_seq = ntohl(sk->acked_seq);
4302         t1->window = ntohs(tcp_select_window(sk));
4303         t1->doff = sizeof(*t1)/4;
4304         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
4305 
4306          /*     Send it and free it.
4307           *     This will prevent the timer from automatically being restarted.
4308           */
4309         sk->prot->queue_xmit(sk, dev, buff, 1);
4310         tcp_statistics.TcpOutSegs++;
4311 }
4312 
4313 void
4314 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4315 {
4316         if (sk->zapped)
4317                 return;         /* After a valid reset we can send no more */
4318 
4319         tcp_write_wakeup(sk);
4320 
4321         sk->backoff++;
4322         sk->rto = min(sk->rto << 1, 120*HZ);
4323         reset_timer (sk, TIME_PROBE0, sk->rto);
4324         sk->retransmits++;
4325         sk->prot->retransmits ++;
4326 }
4327 
4328 /*
4329  *      Socket option code for TCP. 
4330  */
4331   
4332 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4333 {
4334         int val,err;
4335 
4336         if(level!=SOL_TCP)
4337                 return ip_setsockopt(sk,level,optname,optval,optlen);
4338 
4339         if (optval == NULL) 
4340                 return(-EINVAL);
4341 
4342         err=verify_area(VERIFY_READ, optval, sizeof(int));
4343         if(err)
4344                 return err;
4345         
4346         val = get_fs_long((unsigned long *)optval);
4347 
4348         switch(optname)
4349         {
4350                 case TCP_MAXSEG:
4351 /*
4352  * values greater than interface MTU won't take effect.  however at
4353  * the point when this call is done we typically don't yet know
4354  * which interface is going to be used
4355  */
4356                         if(val<1||val>MAX_WINDOW)
4357                                 return -EINVAL;
4358                         sk->user_mss=val;
4359                         return 0;
4360                 case TCP_NODELAY:
4361                         sk->nonagle=(val==0)?0:1;
4362                         return 0;
4363                 default:
4364                         return(-ENOPROTOOPT);
4365         }
4366 }
4367 
4368 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4369 {
4370         int val,err;
4371 
4372         if(level!=SOL_TCP)
4373                 return ip_getsockopt(sk,level,optname,optval,optlen);
4374                         
4375         switch(optname)
4376         {
4377                 case TCP_MAXSEG:
4378                         val=sk->user_mss;
4379                         break;
4380                 case TCP_NODELAY:
4381                         val=sk->nonagle;        /* Until Johannes stuff is in */
4382                         break;
4383                 default:
4384                         return(-ENOPROTOOPT);
4385         }
4386         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
4387         if(err)
4388                 return err;
4389         put_fs_long(sizeof(int),(unsigned long *) optlen);
4390 
4391         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
4392         if(err)
4393                 return err;
4394         put_fs_long(val,(unsigned long *)optval);
4395 
4396         return(0);
4397 }       
4398 
4399 
4400 struct proto tcp_prot = {
4401         sock_wmalloc,
4402         sock_rmalloc,
4403         sock_wfree,
4404         sock_rfree,
4405         sock_rspace,
4406         sock_wspace,
4407         tcp_close,
4408         tcp_read,
4409         tcp_write,
4410         tcp_sendto,
4411         tcp_recvfrom,
4412         ip_build_header,
4413         tcp_connect,
4414         tcp_accept,
4415         ip_queue_xmit,
4416         tcp_retransmit,
4417         tcp_write_wakeup,
4418         tcp_read_wakeup,
4419         tcp_rcv,
4420         tcp_select,
4421         tcp_ioctl,
4422         NULL,
4423         tcp_shutdown,
4424         tcp_setsockopt,
4425         tcp_getsockopt,
4426         128,
4427         0,
4428         {NULL,},
4429         "TCP"
4430 };

/* [previous][next][first][last][top][bottom][index][help] */