root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. tcp_set_state
  3. tcp_select_window
  4. tcp_find_established
  5. tcp_dequeue_established
  6. tcp_time_wait
  7. tcp_retransmit
  8. tcp_err
  9. tcp_readable
  10. tcp_select
  11. tcp_ioctl
  12. tcp_check
  13. tcp_send_check
  14. tcp_send_skb
  15. tcp_dequeue_partial
  16. tcp_send_partial
  17. tcp_enqueue_partial
  18. tcp_send_ack
  19. tcp_build_header
  20. tcp_write
  21. tcp_sendto
  22. tcp_read_wakeup
  23. cleanup_rbuf
  24. tcp_read_urg
  25. tcp_read
  26. tcp_shutdown
  27. tcp_recvfrom
  28. tcp_reset
  29. tcp_options
  30. default_mask
  31. tcp_conn_request
  32. tcp_close
  33. tcp_write_xmit
  34. tcp_ack
  35. tcp_data
  36. tcp_check_urg
  37. tcp_urg
  38. tcp_fin
  39. tcp_accept
  40. tcp_connect
  41. tcp_sequence
  42. tcp_clean_end
  43. tcp_rcv
  44. tcp_write_wakeup
  45. tcp_send_probe0
  46. tcp_setsockopt
  47. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@no.unit.nvg>
  20  *
  21  * Fixes:       
  22  *              Alan Cox        :       Numerous verify_area() calls
  23  *              Alan Cox        :       Set the ACK bit on a reset
  24  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  25  *                                      and was trying to connect (tcp_err()).
  26  *              Alan Cox        :       All icmp error handling was broken
  27  *                                      pointers passed where wrong and the
  28  *                                      socket was looked up backwards. Nobody
  29  *                                      tested any icmp error code obviously.
  30  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  31  *                                      on errors. select behaves and the icmp error race
  32  *                                      has gone by moving it into sock.c
  33  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  34  *                                      packets for unknown sockets.
  35  *              Alan Cox        :       tcp option processing.
  36  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  37  *              Herp Rosmanith  :       More reset fixes
  38  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  39  *                                      any kind of RST is right out.
  40  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  41  *                                      otherwise odd bits of prattle escape still
  42  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  43  *                                      LAN workplace lockups.
  44  *              Alan Cox        :       Some tidyups using the new skb list facilities
  45  *              Alan Cox        :       sk->keepopen now seems to work
  46  *              Alan Cox        :       Pulls options out correctly on accepts
  47  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  48  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  49  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  50  *              Alan Cox        :       Added some beter commenting, as the tcp is hard to follow
  51  *              Alan Cox        :       Removed incorrect check for 20 * psh
  52  *      Michael O'Reilly        :       ack < copied bug fix.
  53  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  54  *              Alan Cox        :       FIN with no memory -> CRASH
  55  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  56  *              Alan Cox        :       Added TCP options (SOL_TCP)
  57  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  58  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  59  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  60  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  61  *              Alan Cox        :       Put in missing check for SYN bit.
  62  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  63  *                                      window non shrink trick.
  64  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  65  *              Charles Hedrick :       TCP fixes
  66  *              Toomas Tamm     :       TCP window fixes
  67  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  68  *              Charles Hedrick :       Rewrote most of it to actually work
  69  *              Linus           :       Rewrote tcp_read() and URG handling
  70  *                                      completely
  71  *              Gerhard Koerting:       Fixed some missing timer handling
  72  *              Matthew Dillon  :       Reworked TCP machine states as per RFC
  73  *              Gerhard Koerting:       PC/TCP workarounds
  74  *              Adam Caldwell   :       Assorted timer/timing errors
  75  *              Matthew Dillon  :       Fixed another RST bug
  76  *              Alan Cox        :       Move to kernel side addressing changes.
  77  *              Alan Cox        :       Beginning work on TCP fastpathing (not yet usable)
  78  *              Arnt Gulbrandsen:       Turbocharged tcp_check() routine.
  79  *              Alan Cox        :       TCP fast path debugging
  80  *              Alan Cox        :       Window clamping
  81  *              Michael Riepe   :       Bug in tcp_check()
  82  *              Matt Dillon     :       More TCP improvements and RST bug fixes
  83  *              Matt Dillon     :       Yet more small nasties remove from the TCP code
  84  *                                      (Be very nice to this man if tcp finally works 100%) 8)
  85  *              Alan Cox        :       BSD accept semantics. 
  86  *              Alan Cox        :       Reset on closedown bug.
  87  *      Peter De Schrijver      :       ENOTCONN check missing in tcp_sendto().
  88  *              Michael Pall    :       Handle select() after URG properly in all cases.
  89  *              Michael Pall    :       Undo the last fix in tcp_read_urg() (multi URG PUSH broke rlogin).
  90  *              Michael Pall    :       Fix the multi URG PUSH problem in tcp_readable(), select() after URG works now.
  91  *              Michael Pall    :       recv(...,MSG_OOB) never blocks in the BSD api.
  92  *              Alan Cox        :       Changed the semantics of sk->socket to 
  93  *                                      fix a race and a signal problem with
  94  *                                      accept() and async I/O.
  95  *              Alan Cox        :       Relaxed the rules on tcp_sendto().
  96  *
  97  *
  98  * To Fix:
  99  *                      Fast path the code. Two things here - fix the window calculation
 100  *              so it doesn't iterate over the queue, also spot packets with no funny
 101  *              options arriving in order and process directly.
 102  *
 103  *              This program is free software; you can redistribute it and/or
 104  *              modify it under the terms of the GNU General Public License
 105  *              as published by the Free Software Foundation; either version
 106  *              2 of the License, or(at your option) any later version.
 107  *
 108  * Description of States:
 109  *
 110  *      TCP_SYN_SENT            sent a connection request, waiting for ack
 111  *
 112  *      TCP_SYN_RECV            received a connection request, sent ack,
 113  *                              waiting for final ack in three-way handshake.
 114  *
 115  *      TCP_ESTABLISHED         connection established
 116  *
 117  *      TCP_FIN_WAIT1           our side has shutdown, waiting to complete
 118  *                              transmission of remaining buffered data
 119  *
 120  *      TCP_FIN_WAIT2           all buffered data sent, waiting for remote
 121  *                              to shutdown
 122  *
 123  *      TCP_CLOSING             both sides have shutdown but we still have
 124  *                              data we have to finish sending
 125  *
 126  *      TCP_TIME_WAIT           timeout to catch resent junk before entering
 127  *                              closed, can only be entered from FIN_WAIT2
 128  *                              or CLOSING.  Required because the other end
 129  *                              may not have gotten our last ACK causing it
 130  *                              to retransmit the data packet (which we ignore)
 131  *
 132  *      TCP_CLOSE_WAIT          remote side has shutdown and is waiting for
 133  *                              us to finish writing our data and to shutdown
 134  *                              (we have to close() to move on to LAST_ACK)
 135  *
 136  *      TCP_LAST_ACK            out side has shutdown after remote has
 137  *                              shutdown.  There may still be data in our
 138  *                              buffer that we have to finish sending
 139  *              
 140  *      TCP_CLOSE               socket is finished
 141  */
 142 #include <linux/types.h>
 143 #include <linux/sched.h>
 144 #include <linux/mm.h>
 145 #include <linux/string.h>
 146 #include <linux/socket.h>
 147 #include <linux/sockios.h>
 148 #include <linux/termios.h>
 149 #include <linux/in.h>
 150 #include <linux/fcntl.h>
 151 #include <linux/inet.h>
 152 #include <linux/netdevice.h>
 153 #include "snmp.h"
 154 #include "ip.h"
 155 #include "protocol.h"
 156 #include "icmp.h"
 157 #include "tcp.h"
 158 #include <linux/skbuff.h>
 159 #include "sock.h"
 160 #include "route.h"
 161 #include <linux/errno.h>
 162 #include <linux/timer.h>
 163 #include <asm/system.h>
 164 #include <asm/segment.h>
 165 #include <linux/mm.h>
 166 
 167 #undef TCP_FASTPATH
 168 
 169 #define SEQ_TICK 3
 170 unsigned long seq_offset;
 171 struct tcp_mib  tcp_statistics;
 172 
 173 #ifdef TCP_FASTPATH
 174 unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0;
 175 #endif
 176 
 177 
 178 static __inline__ int min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 179 {
 180         if (a < b) 
 181                 return(a);
 182         return(b);
 183 }
 184 
 185 #undef STATE_TRACE
 186 
 187 static __inline__ void tcp_set_state(struct sock *sk, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 188 {
 189         if(sk->state==TCP_ESTABLISHED)
 190                 tcp_statistics.TcpCurrEstab--;
 191 #ifdef STATE_TRACE
 192         if(sk->debug)
 193                 printk("TCP sk=%s, State %d -> %d\n",sk, sk->state,state);
 194 #endif  
 195         sk->state=state;
 196         if(state==TCP_ESTABLISHED)
 197                 tcp_statistics.TcpCurrEstab++;
 198 }
 199 
 200 /* This routine picks a TCP windows for a socket based on
 201    the following constraints
 202    
 203    1. The window can never be shrunk once it is offered (RFC 793)
 204    2. We limit memory per socket
 205    
 206    For now we use NET2E3's heuristic of offering half the memory
 207    we have handy. All is not as bad as this seems however because
 208    of two things. Firstly we will bin packets even within the window
 209    in order to get the data we are waiting for into the memory limit.
 210    Secondly we bin common duplicate forms at receive time
 211    
 212    Better heuristics welcome
 213 */
 214    
 215 int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 216 {
 217         int new_window = sk->prot->rspace(sk);
 218         
 219         if(sk->window_clamp)
 220                 new_window=min(sk->window_clamp,new_window);
 221 /*
 222  * two things are going on here.  First, we don't ever offer a
 223  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 224  * receiver side of SWS as specified in RFC1122.
 225  * Second, we always give them at least the window they
 226  * had before, in order to avoid retracting window.  This
 227  * is technically allowed, but RFC1122 advises against it and
 228  * in practice it causes trouble.
 229  */
 230         if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window)
 231                 return(sk->window);
 232         return(new_window);
 233 }
 234 
 235 /*
 236  *      Find someone to 'accept'. Must be called with
 237  *      sk->inuse=1 or cli()
 238  */ 
 239 
 240 static struct sk_buff *tcp_find_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 241 {
 242         struct sk_buff *p=skb_peek(&s->receive_queue);
 243         if(p==NULL)
 244                 return NULL;
 245         do
 246         {
 247                 if(p->sk->state>=TCP_ESTABLISHED)
 248                         return p;
 249                 p=p->next;
 250         }
 251         while(p!=skb_peek(&s->receive_queue));
 252         return NULL;
 253 }
 254 
 255 static struct sk_buff *tcp_dequeue_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 256 {
 257         struct sk_buff *skb;
 258         unsigned long flags;
 259         save_flags(flags);
 260         cli(); 
 261         skb=tcp_find_established(s);
 262         if(skb!=NULL)
 263                 skb_unlink(skb);        /* Take it off the queue */
 264         restore_flags(flags);
 265         return skb;
 266 }
 267 
 268 
 269 /*
 270  *      Enter the time wait state. 
 271  */
 272 
 273 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 274 {
 275         tcp_set_state(sk,TCP_TIME_WAIT);
 276         sk->shutdown = SHUTDOWN_MASK;
 277         if (!sk->dead)
 278                 sk->state_change(sk);
 279         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 280 }
 281 
 282 /*
 283  *      A timer event has trigger a tcp retransmit timeout. The
 284  *      socket xmit queue is ready and set up to send. Because
 285  *      the ack receive code keeps the queue straight we do
 286  *      nothing clever here.
 287  */
 288 
 289 static void tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 290 {
 291         if (all) 
 292         {
 293                 ip_retransmit(sk, all);
 294                 return;
 295         }
 296 
 297         sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 298         /* sk->ssthresh in theory can be zero.  I guess that's OK */
 299         sk->cong_count = 0;
 300 
 301         sk->cong_window = 1;
 302 
 303         /* Do the actual retransmit. */
 304         ip_retransmit(sk, all);
 305 }
 306 
 307 
 308 /*
 309  * This routine is called by the ICMP module when it gets some
 310  * sort of error condition.  If err < 0 then the socket should
 311  * be closed and the error returned to the user.  If err > 0
 312  * it's just the icmp type << 8 | icmp code.  After adjustment
 313  * header points to the first 8 bytes of the tcp header.  We need
 314  * to find the appropriate port.
 315  */
 316 
 317 void tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 318         unsigned long saddr, struct inet_protocol *protocol)
 319 {
 320         struct tcphdr *th;
 321         struct sock *sk;
 322         struct iphdr *iph=(struct iphdr *)header;
 323   
 324         header+=4*iph->ihl;
 325    
 326 
 327         th =(struct tcphdr *)header;
 328         sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr);
 329 
 330         if (sk == NULL) 
 331                 return;
 332   
 333         if(err<0)
 334         {
 335                 sk->err = -err;
 336                 sk->error_report(sk);
 337                 return;
 338         }
 339 
 340         if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) 
 341         {
 342                 /*
 343                  * FIXME:
 344                  * For now we will just trigger a linear backoff.
 345                  * The slow start code should cause a real backoff here.
 346                  */
 347                 if (sk->cong_window > 4)
 348                         sk->cong_window--;
 349                 return;
 350         }
 351 
 352 /*      sk->err = icmp_err_convert[err & 0xff].errno;  -- moved as TCP should hide non fatals internally (and does) */
 353 
 354         /*
 355          * If we've already connected we will keep trying
 356          * until we time out, or the user gives up.
 357          */
 358 
 359         if (icmp_err_convert[err & 0xff].fatal || sk->state == TCP_SYN_SENT) 
 360         {
 361                 if (sk->state == TCP_SYN_SENT) 
 362                 {
 363                         tcp_statistics.TcpAttemptFails++;
 364                         tcp_set_state(sk,TCP_CLOSE);
 365                         sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 366                 }
 367                 sk->err = icmp_err_convert[err & 0xff].errno;           
 368         }
 369         return;
 370 }
 371 
 372 
 373 /*
 374  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 375  *      in the received data queue (ie a frame missing that needs sending to us)
 376  */
 377 
 378 static int tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 379 {
 380         unsigned long counted;
 381         unsigned long amount;
 382         struct sk_buff *skb;
 383         int sum;
 384         unsigned long flags;
 385 
 386         if(sk && sk->debug)
 387                 printk("tcp_readable: %p - ",sk);
 388 
 389         save_flags(flags);
 390         cli();
 391         if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 392         {
 393                 restore_flags(flags);
 394                 if(sk && sk->debug) 
 395                         printk("empty\n");
 396                 return(0);
 397         }
 398   
 399         counted = sk->copied_seq+1;     /* Where we are at the moment */
 400         amount = 0;
 401   
 402         /* Do until a push or until we are out of data. */
 403         do 
 404         {
 405                 if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 406                         break;
 407                 sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 408                 if (skb->h.th->syn)
 409                         sum++;
 410                 if (sum > 0) 
 411                 {                                       /* Add it up, move on */
 412                         amount += sum;
 413                         if (skb->h.th->syn) 
 414                                 amount--;
 415                         counted += sum;
 416                 }
 417                 /*
 418                  * Don't count urg data ... but do it in the right place!
 419                  * Consider: "old_data (ptr is here) URG PUSH data"
 420                  * The old code would stop at the first push because
 421                  * it counted the urg (amount==1) and then does amount--
 422                  * *after* the loop.  This means tcp_readable() always
 423                  * returned zero if any URG PUSH was in the queue, even
 424                  * though there was normal data available. If we subtract
 425                  * the urg data right here, we even get it to work for more
 426                  * than one URG PUSH skb without normal data.
 427                  * This means that select() finally works now with urg data
 428                  * in the queue.  Note that rlogin was never affected
 429                  * because it doesn't use select(); it uses two processes
 430                  * and a blocking read().  And the queue scan in tcp_read()
 431                  * was correct.  Mike <pall@rz.uni-karlsruhe.de>
 432                  */
 433                 if (skb->h.th->urg)
 434                         amount--;       /* don't count urg data */
 435                 if (amount && skb->h.th->psh) break;
 436                 skb = skb->next;
 437         }
 438         while(skb != (struct sk_buff *)&sk->receive_queue);
 439 
 440         restore_flags(flags);
 441         if(sk->debug)
 442                 printk("got %lu bytes.\n",amount);
 443         return(amount);
 444 }
 445 
 446 
 447 /*
 448  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 449  *      listening socket has a receive queue of sockets to accept.
 450  */
 451 
 452 static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 453 {
 454         sk->inuse = 1;
 455 
 456         switch(sel_type) 
 457         {
 458                 case SEL_IN:
 459                         select_wait(sk->sleep, wait);
 460                         if (skb_peek(&sk->receive_queue) != NULL) 
 461                         {
 462                                 if ((sk->state == TCP_LISTEN && tcp_find_established(sk)) || tcp_readable(sk)) 
 463                                 {
 464                                         release_sock(sk);
 465                                         return(1);
 466                                 }
 467                         }
 468                         if (sk->err != 0)       /* Receiver error */
 469                         {
 470                                 release_sock(sk);
 471                                 return(1);
 472                         }
 473                         if (sk->shutdown & RCV_SHUTDOWN) 
 474                         {
 475                                 release_sock(sk);
 476                                 return(1);
 477                         } 
 478                         release_sock(sk);
 479                         return(0);
 480                 case SEL_OUT:
 481                         select_wait(sk->sleep, wait);
 482                         if (sk->shutdown & SEND_SHUTDOWN) 
 483                         {
 484                                 /* FIXME: should this return an error? */
 485                                 release_sock(sk);
 486                                 return(0);
 487                         }
 488 
 489                         /*
 490                          * This is now right thanks to a small fix
 491                          * by Matt Dillon.
 492                          */
 493                         
 494                         if (sk->prot->wspace(sk) >= sk->mtu+128+sk->prot->max_header) 
 495                         {
 496                                 release_sock(sk);
 497                                 /* This should cause connect to work ok. */
 498                                 if (sk->state == TCP_SYN_RECV ||
 499                                     sk->state == TCP_SYN_SENT) return(0);
 500                                 return(1);
 501                         }
 502                         release_sock(sk);
 503                         return(0);
 504                 case SEL_EX:
 505                         select_wait(sk->sleep,wait);
 506                         if (sk->err || sk->urg_data) 
 507                         {
 508                                 release_sock(sk);
 509                                 return(1);
 510                         }
 511                         release_sock(sk);
 512                         return(0);
 513         }
 514 
 515         release_sock(sk);
 516         return(0);
 517 }
 518 
 519 
 520 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 521 {
 522         int err;
 523         switch(cmd) 
 524         {
 525 
 526                 case TIOCINQ:
 527 #ifdef FIXME    /* FIXME: */
 528                 case FIONREAD:
 529 #endif
 530                 {
 531                         unsigned long amount;
 532 
 533                         if (sk->state == TCP_LISTEN) 
 534                                 return(-EINVAL);
 535 
 536                         sk->inuse = 1;
 537                         amount = tcp_readable(sk);
 538                         release_sock(sk);
 539                         err=verify_area(VERIFY_WRITE,(void *)arg,
 540                                                    sizeof(unsigned long));
 541                         if(err)
 542                                 return err;
 543                         put_fs_long(amount,(unsigned long *)arg);
 544                         return(0);
 545                 }
 546                 case SIOCATMARK:
 547                 {
 548                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
 549 
 550                         err = verify_area(VERIFY_WRITE,(void *) arg,
 551                                                   sizeof(unsigned long));
 552                         if (err)
 553                                 return err;
 554                         put_fs_long(answ,(int *) arg);
 555                         return(0);
 556                 }
 557                 case TIOCOUTQ:
 558                 {
 559                         unsigned long amount;
 560 
 561                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 562                         amount = sk->prot->wspace(sk);
 563                         err=verify_area(VERIFY_WRITE,(void *)arg,
 564                                                    sizeof(unsigned long));
 565                         if(err)
 566                                 return err;
 567                         put_fs_long(amount,(unsigned long *)arg);
 568                         return(0);
 569                 }
 570                 default:
 571                         return(-EINVAL);
 572         }
 573 }
 574 
 575 
 576 /*
 577  *      This routine computes a TCP checksum. 
 578  */
 579  
 580 unsigned short tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 581           unsigned long saddr, unsigned long daddr)
 582 {     
 583         unsigned long sum;
 584    
 585         if (saddr == 0) saddr = ip_my_addr();
 586 
 587 /*
 588  * stupid, gcc complains when I use just one __asm__ block,
 589  * something about too many reloads, but this is just two
 590  * instructions longer than what I want
 591  */
 592         __asm__("
 593             addl %%ecx, %%ebx
 594             adcl %%edx, %%ebx
 595             adcl $0, %%ebx
 596             "
 597         : "=b"(sum)
 598         : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 599         : "bx", "cx", "dx" );
 600         __asm__("
 601             movl %%ecx, %%edx
 602             cld
 603             cmpl $32, %%ecx
 604             jb 2f
 605             shrl $5, %%ecx
 606             clc
 607 1:          lodsl
 608             adcl %%eax, %%ebx
 609             lodsl
 610             adcl %%eax, %%ebx
 611             lodsl
 612             adcl %%eax, %%ebx
 613             lodsl
 614             adcl %%eax, %%ebx
 615             lodsl
 616             adcl %%eax, %%ebx
 617             lodsl
 618             adcl %%eax, %%ebx
 619             lodsl
 620             adcl %%eax, %%ebx
 621             lodsl
 622             adcl %%eax, %%ebx
 623             loop 1b
 624             adcl $0, %%ebx
 625             movl %%edx, %%ecx
 626 2:          andl $28, %%ecx
 627             je 4f
 628             shrl $2, %%ecx
 629             clc
 630 3:          lodsl
 631             adcl %%eax, %%ebx
 632             loop 3b
 633             adcl $0, %%ebx
 634 4:          movl $0, %%eax
 635             testw $2, %%dx
 636             je 5f
 637             lodsw
 638             addl %%eax, %%ebx
 639             adcl $0, %%ebx
 640             movw $0, %%ax
 641 5:          test $1, %%edx
 642             je 6f
 643             lodsb
 644             addl %%eax, %%ebx
 645             adcl $0, %%ebx
 646 6:          movl %%ebx, %%eax
 647             shrl $16, %%eax
 648             addw %%ax, %%bx
 649             adcw $0, %%bx
 650             "
 651         : "=b"(sum)
 652         : "0"(sum), "c"(len), "S"(th)
 653         : "ax", "bx", "cx", "dx", "si" );
 654 
 655         /* We only want the bottom 16 bits, but we never cleared the top 16. */
 656   
 657         return((~sum) & 0xffff);
 658 }
 659 
 660 
 661 
 662 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 663                 unsigned long daddr, int len, struct sock *sk)
 664 {
 665         th->check = 0;
 666         th->check = tcp_check(th, len, saddr, daddr);
 667         return;
 668 }
 669 
 670 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 671 {
 672         int size;
 673         struct tcphdr * th = skb->h.th;
 674 
 675         /* length of packet (not counting length of pre-tcp headers) */
 676         size = skb->len - ((unsigned char *) th - skb->data);
 677 
 678         /* sanity check it.. */
 679         if (size < sizeof(struct tcphdr) || size > skb->len) 
 680         {
 681                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 682                         skb, skb->data, th, skb->len);
 683                 kfree_skb(skb, FREE_WRITE);
 684                 return;
 685         }
 686 
 687         /* If we have queued a header size packet.. */
 688         if (size == sizeof(struct tcphdr)) 
 689         {
 690                 /* If its got a syn or fin its notionally included in the size..*/
 691                 if(!th->syn && !th->fin) 
 692                 {
 693                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 694                         kfree_skb(skb,FREE_WRITE);
 695                         return;
 696                 }
 697         }
 698 
 699         tcp_statistics.TcpOutSegs++;  
 700 
 701         skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
 702         if (after(skb->h.seq, sk->window_seq) ||
 703             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 704              sk->packets_out >= sk->cong_window) 
 705         {
 706                 /* checksum will be supplied by tcp_write_xmit.  So
 707                  * we shouldn't need to set it at all.  I'm being paranoid */
 708                 th->check = 0;
 709                 if (skb->next != NULL) 
 710                 {
 711                         printk("tcp_send_partial: next != NULL\n");
 712                         skb_unlink(skb);
 713                 }
 714                 skb_queue_tail(&sk->write_queue, skb);
 715                 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 716                     sk->send_head == NULL &&
 717                     sk->ack_backlog == 0)
 718                         reset_timer(sk, TIME_PROBE0, sk->rto);
 719         } 
 720         else 
 721         {
 722                 th->ack_seq = ntohl(sk->acked_seq);
 723                 th->window = ntohs(tcp_select_window(sk));
 724 
 725                 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
 726 
 727                 sk->sent_seq = sk->write_seq;
 728                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 729         }
 730 }
 731 
 732 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 733 {
 734         struct sk_buff * skb;
 735         unsigned long flags;
 736 
 737         save_flags(flags);
 738         cli();
 739         skb = sk->partial;
 740         if (skb) {
 741                 sk->partial = NULL;
 742                 del_timer(&sk->partial_timer);
 743         }
 744         restore_flags(flags);
 745         return skb;
 746 }
 747 
 748 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 749 {
 750         struct sk_buff *skb;
 751 
 752         if (sk == NULL)
 753                 return;
 754         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 755                 tcp_send_skb(sk, skb);
 756 }
 757 
 758 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 759 {
 760         struct sk_buff * tmp;
 761         unsigned long flags;
 762 
 763         save_flags(flags);
 764         cli();
 765         tmp = sk->partial;
 766         if (tmp)
 767                 del_timer(&sk->partial_timer);
 768         sk->partial = skb;
 769         init_timer(&sk->partial_timer);
 770         sk->partial_timer.expires = HZ;
 771         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 772         sk->partial_timer.data = (unsigned long) sk;
 773         add_timer(&sk->partial_timer);
 774         restore_flags(flags);
 775         if (tmp)
 776                 tcp_send_skb(sk, tmp);
 777 }
 778 
 779 
 780 /*
 781  *      This routine sends an ack and also updates the window. 
 782  */
 783  
 784 static void tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 785              struct sock *sk,
 786              struct tcphdr *th, unsigned long daddr)
 787 {
 788         struct sk_buff *buff;
 789         struct tcphdr *t1;
 790         struct device *dev = NULL;
 791         int tmp;
 792 
 793         if(sk->zapped)
 794                 return;         /* We have been reset, we may not send again */
 795         /*
 796          * We need to grab some memory, and put together an ack,
 797          * and then put it into the queue to be sent.
 798          */
 799 
 800         buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 801         if (buff == NULL) 
 802         {
 803                 /* Force it to send an ack. */
 804                 sk->ack_backlog++;
 805                 if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) 
 806                 {
 807                         reset_timer(sk, TIME_WRITE, 10);
 808                 }
 809                 return;
 810         }
 811 
 812         buff->len = sizeof(struct tcphdr);
 813         buff->sk = sk;
 814         buff->localroute = sk->localroute;
 815         t1 =(struct tcphdr *) buff->data;
 816 
 817         /* Put in the IP header and routing stuff. */
 818         tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 819                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 820         if (tmp < 0) 
 821         {
 822                 buff->free=1;
 823                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 824                 return;
 825         }
 826         buff->len += tmp;
 827         t1 =(struct tcphdr *)((char *)t1 +tmp);
 828 
 829         /* FIXME: */
 830         memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 831 
 832         /*
 833          *      Swap the send and the receive. 
 834          */
 835          
 836         t1->dest = th->source;
 837         t1->source = th->dest;
 838         t1->seq = ntohl(sequence);
 839         t1->ack = 1;
 840         sk->window = tcp_select_window(sk);
 841         t1->window = ntohs(sk->window);
 842         t1->res1 = 0;
 843         t1->res2 = 0;
 844         t1->rst = 0;
 845         t1->urg = 0;
 846         t1->syn = 0;
 847         t1->psh = 0;
 848         t1->fin = 0;
 849         if (ack == sk->acked_seq) 
 850         {
 851                 sk->ack_backlog = 0;
 852                 sk->bytes_rcv = 0;
 853                 sk->ack_timed = 0;
 854                 if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
 855                                   && sk->timeout == TIME_WRITE) 
 856                 {
 857                         if(sk->keepopen) {
 858                                 reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 859                         } else {
 860                                 delete_timer(sk);
 861                         }
 862                 }
 863         }
 864         t1->ack_seq = ntohl(ack);
 865         t1->doff = sizeof(*t1)/4;
 866         tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 867         if (sk->debug)
 868                  printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 869         tcp_statistics.TcpOutSegs++;
 870         sk->prot->queue_xmit(sk, dev, buff, 1);
 871 }
 872 
 873 
 874 /* 
 875  *      This routine builds a generic TCP header. 
 876  */
 877  
 878 static int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 879 {
 880 
 881         /* FIXME: want to get rid of this. */
 882         memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 883         th->seq = htonl(sk->write_seq);
 884         th->psh =(push == 0) ? 1 : 0;
 885         th->doff = sizeof(*th)/4;
 886         th->ack = 1;
 887         th->fin = 0;
 888         sk->ack_backlog = 0;
 889         sk->bytes_rcv = 0;
 890         sk->ack_timed = 0;
 891         th->ack_seq = htonl(sk->acked_seq);
 892         sk->window = tcp_select_window(sk);
 893         th->window = htons(sk->window);
 894 
 895         return(sizeof(*th));
 896 }
 897 
 898 /*
 899  *      This routine copies from a user buffer into a socket,
 900  *      and starts the transmit system.
 901  */
 902 
 903 static int tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 904           int len, int nonblock, unsigned flags)
 905 {
 906         int copied = 0;
 907         int copy;
 908         int tmp;
 909         struct sk_buff *skb;
 910         struct sk_buff *send_tmp;
 911         unsigned char *buff;
 912         struct proto *prot;
 913         struct device *dev = NULL;
 914 
 915         sk->inuse=1;
 916         prot = sk->prot;
 917         while(len > 0) 
 918         {
 919                 if (sk->err) 
 920                 {                       /* Stop on an error */
 921                         release_sock(sk);
 922                         if (copied) 
 923                                 return(copied);
 924                         tmp = -sk->err;
 925                         sk->err = 0;
 926                         return(tmp);
 927                 }
 928 
 929         /*
 930          *      First thing we do is make sure that we are established. 
 931          */
 932         
 933                 if (sk->shutdown & SEND_SHUTDOWN) 
 934                 {
 935                         release_sock(sk);
 936                         sk->err = EPIPE;
 937                         if (copied) 
 938                                 return(copied);
 939                         sk->err = 0;
 940                         return(-EPIPE);
 941                 }
 942 
 943 
 944         /* 
 945          *      Wait for a connection to finish.
 946          */
 947         
 948                 while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
 949                 {
 950                         if (sk->err) 
 951                         {
 952                                 release_sock(sk);
 953                                 if (copied) 
 954                                         return(copied);
 955                                 tmp = -sk->err;
 956                                 sk->err = 0;
 957                                 return(tmp);
 958                         }
 959 
 960                         if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
 961                         {
 962                                 release_sock(sk);
 963                                 if (copied) 
 964                                         return(copied);
 965 
 966                                 if (sk->err) 
 967                                 {
 968                                         tmp = -sk->err;
 969                                         sk->err = 0;
 970                                         return(tmp);
 971                                 }
 972 
 973                                 if (sk->keepopen) 
 974                                 {
 975                                         send_sig(SIGPIPE, current, 0);
 976                                 }
 977                                 return(-EPIPE);
 978                         }
 979 
 980                         if (nonblock || copied) 
 981                         {
 982                                 release_sock(sk);
 983                                 if (copied) 
 984                                         return(copied);
 985                                 return(-EAGAIN);
 986                         }
 987 
 988                         release_sock(sk);
 989                         cli();
 990                 
 991                         if (sk->state != TCP_ESTABLISHED &&
 992                                 sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
 993                         {
 994                                 interruptible_sleep_on(sk->sleep);
 995                                 if (current->signal & ~current->blocked) 
 996                                 {
 997                                         sti();
 998                                         if (copied) 
 999                                                 return(copied);
1000                                         return(-ERESTARTSYS);
1001                                 }
1002                         }
1003                         sk->inuse = 1;
1004                         sti();
1005                 }
1006 
1007         /*
1008          * The following code can result in copy <= if sk->mss is ever
1009          * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
1010          * sk->mtu is constant once SYN processing is finished.  I.e. we
1011          * had better not get here until we've seen his SYN and at least one
1012          * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
1013          * But ESTABLISHED should guarantee that.  sk->max_window is by definition
1014          * non-decreasing.  Note that any ioctl to set user_mss must be done
1015          * before the exchange of SYN's.  If the initial ack from the other
1016          * end has a window of 0, max_window and thus mss will both be 0.
1017          */
1018 
1019         /* 
1020          *      Now we need to check if we have a half built packet. 
1021          */
1022 
1023                 if ((skb = tcp_dequeue_partial(sk)) != NULL) 
1024                 {
1025                         int hdrlen;
1026 
1027                          /* IP header + TCP header */
1028                         hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
1029                                  + sizeof(struct tcphdr);
1030         
1031                         /* Add more stuff to the end of skb->len */
1032                         if (!(flags & MSG_OOB)) 
1033                         {
1034                                 copy = min(sk->mss - (skb->len - hdrlen), len);
1035                                 /* FIXME: this is really a bug. */
1036                                 if (copy <= 0) 
1037                                 {
1038                                         printk("TCP: **bug**: \"copy\" <= 0!!\n");
1039                                         copy = 0;
1040                                 }
1041           
1042                                 memcpy_fromfs(skb->data + skb->len, from, copy);
1043                                 skb->len += copy;
1044                                 from += copy;
1045                                 copied += copy;
1046                                 len -= copy;
1047                                 sk->write_seq += copy;
1048                         }
1049                         if ((skb->len - hdrlen) >= sk->mss ||
1050                                 (flags & MSG_OOB) || !sk->packets_out)
1051                                 tcp_send_skb(sk, skb);
1052                         else
1053                                 tcp_enqueue_partial(skb, sk);
1054                         continue;
1055                 }
1056 
1057         /*
1058          * We also need to worry about the window.
1059          * If window < 1/2 the maximum window we've seen from this
1060          *   host, don't use it.  This is sender side
1061          *   silly window prevention, as specified in RFC1122.
1062          *   (Note that this is different than earlier versions of
1063          *   SWS prevention, e.g. RFC813.).  What we actually do is 
1064          *   use the whole MSS.  Since the results in the right
1065          *   edge of the packet being outside the window, it will
1066          *   be queued for later rather than sent.
1067          */
1068 
1069                 copy = sk->window_seq - sk->write_seq;
1070                 if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
1071                         copy = sk->mss;
1072                 if (copy > len)
1073                         copy = len;
1074 
1075         /*
1076          *      We should really check the window here also. 
1077          */
1078          
1079                 send_tmp = NULL;
1080                 if (copy < sk->mss && !(flags & MSG_OOB)) 
1081                 {
1082                         /*
1083                          *      We will release the socket incase we sleep here. 
1084                          */
1085                         release_sock(sk);
1086                         /*
1087                          *      NB: following must be mtu, because mss can be increased.
1088                          *      mss is always <= mtu 
1089                          */
1090                         skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
1091                         sk->inuse = 1;
1092                         send_tmp = skb;
1093                 } 
1094                 else 
1095                 {
1096                         /*
1097                          *      We will release the socket incase we sleep here. 
1098                          */
1099                         release_sock(sk);
1100                         skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
1101                         sk->inuse = 1;
1102                 }
1103 
1104                 /*
1105                  *      If we didn't get any memory, we need to sleep. 
1106                  */
1107 
1108                 if (skb == NULL) 
1109                 {
1110                         if (nonblock) 
1111                         {
1112                                 release_sock(sk);
1113                                 if (copied) 
1114                                         return(copied);
1115                                 return(-EAGAIN);
1116                         }
1117 
1118                         /*
1119                          *      FIXME: here is another race condition. 
1120                          */
1121 
1122                         tmp = sk->wmem_alloc;
1123                         release_sock(sk);
1124                         cli();
1125                         /*
1126                          *      Again we will try to avoid it. 
1127                          */
1128                         if (tmp <= sk->wmem_alloc &&
1129                                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
1130                                 && sk->err == 0) 
1131                         {
1132                                 interruptible_sleep_on(sk->sleep);
1133                                 if (current->signal & ~current->blocked) 
1134                                 {
1135                                         sti();
1136                                         if (copied) 
1137                                                 return(copied);
1138                                         return(-ERESTARTSYS);
1139                                 }
1140                         }
1141                         sk->inuse = 1;
1142                         sti();
1143                         continue;
1144                 }
1145 
1146                 skb->len = 0;
1147                 skb->sk = sk;
1148                 skb->free = 0;
1149                 skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
1150         
1151                 buff = skb->data;
1152         
1153                 /*
1154                  * FIXME: we need to optimize this.
1155                  * Perhaps some hints here would be good.
1156                  */
1157                 
1158                 tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1159                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1160                 if (tmp < 0 ) 
1161                 {
1162                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1163                         release_sock(sk);
1164                         if (copied) 
1165                                 return(copied);
1166                         return(tmp);
1167                 }
1168                 skb->len += tmp;
1169                 skb->dev = dev;
1170                 buff += tmp;
1171                 skb->h.th =(struct tcphdr *) buff;
1172                 tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1173                 if (tmp < 0) 
1174                 {
1175                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1176                         release_sock(sk);
1177                         if (copied) 
1178                                 return(copied);
1179                         return(tmp);
1180                 }
1181 
1182                 if (flags & MSG_OOB) 
1183                 {
1184                         ((struct tcphdr *)buff)->urg = 1;
1185                         ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1186                 }
1187                 skb->len += tmp;
1188                 memcpy_fromfs(buff+tmp, from, copy);
1189 
1190                 from += copy;
1191                 copied += copy;
1192                 len -= copy;
1193                 skb->len += copy;
1194                 skb->free = 0;
1195                 sk->write_seq += copy;
1196         
1197                 if (send_tmp != NULL && sk->packets_out) 
1198                 {
1199                         tcp_enqueue_partial(send_tmp, sk);
1200                         continue;
1201                 }
1202                 tcp_send_skb(sk, skb);
1203         }
1204         sk->err = 0;
1205 
1206 /*
1207  *      Nagle's rule. Turn Nagle off with TCP_NODELAY for highly
1208  *      interactive fast network servers. It's meant to be on and
1209  *      it really improves the throughput though not the echo time
1210  *      on my slow slip link - Alan
1211  */
1212 
1213 /*
1214  *      Avoid possible race on send_tmp - c/o Johannes Stille 
1215  */
1216  
1217         if(sk->partial && ((!sk->packets_out) 
1218      /* If not nagling we can send on the before case too.. */
1219               || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1220         ))
1221                 tcp_send_partial(sk);
1222 
1223         release_sock(sk);
1224         return(copied);
1225 }
1226 
1227 
1228 static int tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1229            int len, int nonblock, unsigned flags,
1230            struct sockaddr_in *addr, int addr_len)
1231 {
1232         if (flags & ~(MSG_OOB|MSG_DONTROUTE))
1233                 return -EINVAL;
1234         if (sk->state == TCP_CLOSE)
1235                 return -ENOTCONN;
1236         if (addr_len < sizeof(*addr))
1237                 return -EINVAL;
1238         if (addr->sin_family && addr->sin_family != AF_INET) 
1239                 return -EINVAL;
1240         if (addr->sin_port != sk->dummy_th.dest) 
1241                 return -EISCONN;
1242         if (addr->sin_addr.s_addr != sk->daddr) 
1243                 return -EISCONN;
1244         return tcp_write(sk, from, len, nonblock, flags);
1245 }
1246 
1247 
1248 static void tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1249 {
1250         int tmp;
1251         struct device *dev = NULL;
1252         struct tcphdr *t1;
1253         struct sk_buff *buff;
1254 
1255         if (!sk->ack_backlog) 
1256                 return;
1257 
1258         /*
1259          * FIXME: we need to put code here to prevent this routine from
1260          * being called.  Being called once in a while is ok, so only check
1261          * if this is the second time in a row.
1262          */
1263 
1264         /*
1265          * We need to grab some memory, and put together an ack,
1266          * and then put it into the queue to be sent.
1267          */
1268 
1269         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1270         if (buff == NULL) 
1271         {
1272                 /* Try again real soon. */
1273                 reset_timer(sk, TIME_WRITE, 10);
1274                 return;
1275         }
1276 
1277         buff->len = sizeof(struct tcphdr);
1278         buff->sk = sk;
1279         buff->localroute = sk->localroute;
1280         
1281         /*
1282          *      Put in the IP header and routing stuff. 
1283          */
1284 
1285         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1286                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1287         if (tmp < 0) 
1288         {
1289                 buff->free=1;
1290                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1291                 return;
1292         }
1293 
1294         buff->len += tmp;
1295         t1 =(struct tcphdr *)(buff->data +tmp);
1296 
1297         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1298         t1->seq = htonl(sk->sent_seq);
1299         t1->ack = 1;
1300         t1->res1 = 0;
1301         t1->res2 = 0;
1302         t1->rst = 0;
1303         t1->urg = 0;
1304         t1->syn = 0;
1305         t1->psh = 0;
1306         sk->ack_backlog = 0;
1307         sk->bytes_rcv = 0;
1308         sk->window = tcp_select_window(sk);
1309         t1->window = ntohs(sk->window);
1310         t1->ack_seq = ntohl(sk->acked_seq);
1311         t1->doff = sizeof(*t1)/4;
1312         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1313         sk->prot->queue_xmit(sk, dev, buff, 1);
1314         tcp_statistics.TcpOutSegs++;
1315 }
1316 
1317 
1318 /*
1319  *      FIXME:
1320  *      This routine frees used buffers.
1321  *      It should consider sending an ACK to let the
1322  *      other end know we now have a bigger window.
1323  */
1324 
1325 static void cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1326 {
1327         unsigned long flags;
1328         unsigned long left;
1329         struct sk_buff *skb;
1330         unsigned long rspace;
1331 
1332         if(sk->debug)
1333                 printk("cleaning rbuf for sk=%p\n", sk);
1334   
1335         save_flags(flags);
1336         cli();
1337   
1338         left = sk->prot->rspace(sk);
1339  
1340         /*
1341          * We have to loop through all the buffer headers,
1342          * and try to free up all the space we can.
1343          */
1344 
1345         while((skb=skb_peek(&sk->receive_queue)) != NULL) 
1346         {
1347                 if (!skb->used) 
1348                         break;
1349                 skb_unlink(skb);
1350                 skb->sk = sk;
1351                 kfree_skb(skb, FREE_READ);
1352         }
1353 
1354         restore_flags(flags);
1355 
1356         /*
1357          * FIXME:
1358          * At this point we should send an ack if the difference
1359          * in the window, and the amount of space is bigger than
1360          * TCP_WINDOW_DIFF.
1361          */
1362 
1363         if(sk->debug)
1364                 printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk),
1365                                             left);
1366         if ((rspace=sk->prot->rspace(sk)) != left) 
1367         {
1368                 /*
1369                  * This area has caused the most trouble.  The current strategy
1370                  * is to simply do nothing if the other end has room to send at
1371                  * least 3 full packets, because the ack from those will auto-
1372                  * matically update the window.  If the other end doesn't think
1373                  * we have much space left, but we have room for at least 1 more
1374                  * complete packet than it thinks we do, we will send an ack
1375                  * immediately.  Otherwise we will wait up to .5 seconds in case
1376                  * the user reads some more.
1377                  */
1378                 sk->ack_backlog++;
1379         /*
1380          * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1381          * if the other end is offering a window smaller than the agreed on MSS
1382          * (called sk->mtu here).  In theory there's no connection between send
1383          * and receive, and so no reason to think that they're going to send
1384          * small packets.  For the moment I'm using the hack of reducing the mss
1385          * only on the send side, so I'm putting mtu here.
1386          */
1387 
1388                 if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) 
1389                 {
1390                         /* Send an ack right now. */
1391                         tcp_read_wakeup(sk);
1392                 } 
1393                 else 
1394                 {
1395                         /* Force it to send an ack soon. */
1396                         int was_active = del_timer(&sk->timer);
1397                         if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
1398                         {
1399                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1400                         } 
1401                         else
1402                                 add_timer(&sk->timer);
1403                 }
1404         }
1405 } 
1406 
1407 
1408 /*
1409  *      Handle reading urgent data. 
1410  */
1411  
1412 static int tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1413              unsigned char *to, int len, unsigned flags)
1414 {
1415 #ifdef NOTDEF
1416         struct wait_queue wait = { current, NULL };
1417 #endif
1418 
1419         while (len > 0) 
1420         {
1421                 if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1422                         return -EINVAL;
1423                 sk->inuse = 1;
1424                 if (sk->urg_data & URG_VALID) 
1425                 {
1426                         char c = sk->urg_data;
1427                         if (!(flags & MSG_PEEK))
1428                                 sk->urg_data = URG_READ;
1429                         put_fs_byte(c, to);
1430                         release_sock(sk);
1431                         return 1;
1432                 }
1433 
1434                 release_sock(sk);
1435                 
1436                 if (sk->err) 
1437                 {
1438                         int tmp = -sk->err;
1439                         sk->err = 0;
1440                         return tmp;
1441                 }
1442 
1443                 if (sk->state == TCP_CLOSE || sk->done) 
1444                 {
1445                         if (!sk->done) {
1446                                 sk->done = 1;
1447                                 return 0;
1448                         }
1449                         return -ENOTCONN;
1450                 }
1451 
1452                 if (sk->shutdown & RCV_SHUTDOWN) 
1453                 {
1454                         sk->done = 1;
1455                         return 0;
1456                 }
1457 
1458                 /*
1459                  * Fixed the recv(..., MSG_OOB) behaviour.  BSD docs and
1460                  * the available implementations agree in this case:
1461                  * this call should never block, independent of the
1462                  * blocking state of the socket.
1463                  * Mike <pall@rz.uni-karlsruhe.de>
1464                  */
1465                 return -EAGAIN;
1466 #ifdef NOTDEF
1467                 /* remove the loop, if this dead code gets removed! */
1468                 if (nonblock)
1469                         return -EAGAIN;
1470 
1471                 if (current->signal & ~current->blocked)
1472                         return -ERESTARTSYS;
1473 
1474                 current->state = TASK_INTERRUPTIBLE;
1475                 add_wait_queue(sk->sleep, &wait);
1476                 if ((sk->urg_data & URG_NOTYET) && sk->err == 0 &&
1477                     !(sk->shutdown & RCV_SHUTDOWN))
1478                         schedule();
1479                 remove_wait_queue(sk->sleep, &wait);
1480                 current->state = TASK_RUNNING;
1481 #endif
1482         }
1483         return 0;
1484 }
1485 
1486 
1487 /*
1488  *      This routine copies from a sock struct into the user buffer. 
1489  */
1490  
1491 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1492         int len, int nonblock, unsigned flags)
1493 {
1494         struct wait_queue wait = { current, NULL };
1495         int copied = 0;
1496         unsigned long peek_seq;
1497         unsigned long *seq;
1498         unsigned long used;
1499 
1500         /* This error should be checked. */
1501         if (sk->state == TCP_LISTEN)
1502                 return -ENOTCONN;
1503 
1504         /* Urgent data needs to be handled specially. */
1505         if (flags & MSG_OOB)
1506                 return tcp_read_urg(sk, nonblock, to, len, flags);
1507 
1508         peek_seq = sk->copied_seq;
1509         seq = &sk->copied_seq;
1510         if (flags & MSG_PEEK)
1511                 seq = &peek_seq;
1512 
1513         add_wait_queue(sk->sleep, &wait);
1514         sk->inuse = 1;
1515         while (len > 0) 
1516         {
1517                 struct sk_buff * skb;
1518                 unsigned long offset;
1519         
1520                 /*
1521                  * are we at urgent data? Stop if we have read anything.
1522                  */
1523                 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1524                         break;
1525 
1526                 current->state = TASK_INTERRUPTIBLE;
1527 
1528                 skb = skb_peek(&sk->receive_queue);
1529                 do 
1530                 {
1531                         if (!skb)
1532                                 break;
1533                         if (before(1+*seq, skb->h.th->seq))
1534                                 break;
1535                         offset = 1 + *seq - skb->h.th->seq;
1536                         if (skb->h.th->syn)
1537                                 offset--;
1538                         if (offset < skb->len)
1539                                 goto found_ok_skb;
1540                         if (!(flags & MSG_PEEK))
1541                                 skb->used = 1;
1542                         skb = skb->next;
1543                 }
1544                 while (skb != (struct sk_buff *)&sk->receive_queue);
1545 
1546                 if (copied)
1547                         break;
1548 
1549                 if (sk->err) 
1550                 {
1551                         copied = -sk->err;
1552                         sk->err = 0;
1553                         break;
1554                 }
1555 
1556                 if (sk->state == TCP_CLOSE) 
1557                 {
1558                         if (!sk->done) 
1559                         {
1560                                 sk->done = 1;
1561                                 break;
1562                         }
1563                         copied = -ENOTCONN;
1564                         break;
1565                 }
1566 
1567                 if (sk->shutdown & RCV_SHUTDOWN) 
1568                 {
1569                         sk->done = 1;
1570                         break;
1571                 }
1572                         
1573                 if (nonblock) 
1574                 {
1575                         copied = -EAGAIN;
1576                         break;
1577                 }
1578 
1579                 cleanup_rbuf(sk);
1580                 release_sock(sk);
1581                 schedule();
1582                 sk->inuse = 1;
1583 
1584                 if (current->signal & ~current->blocked) 
1585                 {
1586                         copied = -ERESTARTSYS;
1587                         break;
1588                 }
1589                 continue;
1590 
1591         found_ok_skb:
1592                 /* Ok so how much can we use ? */
1593                 used = skb->len - offset;
1594                 if (len < used)
1595                         used = len;
1596                 /* do we have urgent data here? */
1597                 if (sk->urg_data) 
1598                 {
1599                         unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1600                         if (urg_offset < used) 
1601                         {
1602                                 if (!urg_offset) 
1603                                 {
1604                                         if (!sk->urginline) 
1605                                         {
1606                                                 ++*seq;
1607                                                 offset++;
1608                                                 used--;
1609                                         }
1610                                 }
1611                                 else
1612                                         used = urg_offset;
1613                         }
1614                 }
1615                 /* Copy it */
1616                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1617                         skb->h.th->doff*4 + offset, used);
1618                 copied += used;
1619                 len -= used;
1620                 to += used;
1621                 *seq += used;
1622                 if (after(sk->copied_seq+1,sk->urg_seq))
1623                         sk->urg_data = 0;
1624                 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1625                         skb->used = 1;
1626         }
1627         remove_wait_queue(sk->sleep, &wait);
1628         current->state = TASK_RUNNING;
1629 
1630         /* Clean up data we have read: This will do ACK frames */
1631         cleanup_rbuf(sk);
1632         release_sock(sk);
1633         return copied;
1634 }
1635 
1636  
1637 /*
1638  *      Shutdown the sending side of a connection.
1639  */
1640 
1641 void tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1642 {
1643         struct sk_buff *buff;
1644         struct tcphdr *t1, *th;
1645         struct proto *prot;
1646         int tmp;
1647         struct device *dev = NULL;
1648 
1649         /*
1650          * We need to grab some memory, and put together a FIN,
1651          * and then put it into the queue to be sent.
1652          * FIXME:
1653          *
1654          *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1655          *      Most of this is guesswork, so maybe it will work...
1656          */
1657 
1658         if (!(how & SEND_SHUTDOWN)) 
1659                 return;
1660          
1661         /*
1662          *      If we've already sent a FIN, return. 
1663          */
1664          
1665         if (sk->state == TCP_FIN_WAIT1 ||
1666             sk->state == TCP_FIN_WAIT2 ||
1667             sk->state == TCP_CLOSING ||
1668             sk->state == TCP_LAST_ACK ||
1669             sk->state == TCP_TIME_WAIT
1670         ) 
1671         {
1672                 return;
1673         }
1674         sk->inuse = 1;
1675 
1676         /*
1677          * flag that the sender has shutdown
1678          */
1679 
1680         sk->shutdown |= SEND_SHUTDOWN;
1681 
1682         /*
1683          *  Clear out any half completed packets. 
1684          */
1685 
1686         if (sk->partial)
1687                 tcp_send_partial(sk);
1688 
1689         prot =(struct proto *)sk->prot;
1690         th =(struct tcphdr *)&sk->dummy_th;
1691         release_sock(sk); /* incase the malloc sleeps. */
1692         buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1693         if (buff == NULL)
1694                 return;
1695         sk->inuse = 1;
1696 
1697         buff->sk = sk;
1698         buff->len = sizeof(*t1);
1699         buff->localroute = sk->localroute;
1700         t1 =(struct tcphdr *) buff->data;
1701 
1702         /*
1703          *      Put in the IP header and routing stuff. 
1704          */
1705 
1706         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1707                            IPPROTO_TCP, sk->opt,
1708                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1709         if (tmp < 0) 
1710         {
1711                 /*
1712                  *      Finish anyway, treat this as a send that got lost. 
1713                  *
1714                  *      Enter FIN_WAIT1 on normal shutdown, which waits for
1715                  *      written data to be completely acknowledged along
1716                  *      with an acknowledge to our FIN.
1717                  *
1718                  *      Enter FIN_WAIT2 on abnormal shutdown -- close before
1719                  *      connection established.
1720                  */
1721                 buff->free=1;
1722                 prot->wfree(sk,buff->mem_addr, buff->mem_len);
1723 
1724                 if (sk->state == TCP_ESTABLISHED)
1725                         tcp_set_state(sk,TCP_FIN_WAIT1);
1726                 else if(sk->state == TCP_CLOSE_WAIT)
1727                         tcp_set_state(sk,TCP_LAST_ACK);
1728                 else
1729                         tcp_set_state(sk,TCP_FIN_WAIT2);
1730 
1731                 release_sock(sk);
1732                 return;
1733         }
1734 
1735         t1 =(struct tcphdr *)((char *)t1 +tmp);
1736         buff->len += tmp;
1737         buff->dev = dev;
1738         memcpy(t1, th, sizeof(*t1));
1739         t1->seq = ntohl(sk->write_seq);
1740         sk->write_seq++;
1741         buff->h.seq = sk->write_seq;
1742         t1->ack = 1;
1743         t1->ack_seq = ntohl(sk->acked_seq);
1744         t1->window = ntohs(sk->window=tcp_select_window(sk));
1745         t1->fin = 1;
1746         t1->rst = 0;
1747         t1->doff = sizeof(*t1)/4;
1748         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1749 
1750         /*
1751          * If there is data in the write queue, the fin must be appended to
1752          * the write queue.
1753          */
1754         
1755         if (skb_peek(&sk->write_queue) != NULL) 
1756         {
1757                 buff->free=0;
1758                 if (buff->next != NULL) 
1759                 {
1760                         printk("tcp_shutdown: next != NULL\n");
1761                         skb_unlink(buff);
1762                 }
1763                 skb_queue_tail(&sk->write_queue, buff);
1764         } 
1765         else 
1766         {
1767                 sk->sent_seq = sk->write_seq;
1768                 sk->prot->queue_xmit(sk, dev, buff, 0);
1769         }
1770 
1771         if (sk->state == TCP_ESTABLISHED) 
1772                 tcp_set_state(sk,TCP_FIN_WAIT1);
1773         else if (sk->state == TCP_CLOSE_WAIT)
1774                 tcp_set_state(sk,TCP_LAST_ACK);
1775         else
1776                 tcp_set_state(sk,TCP_FIN_WAIT2);
1777 
1778         release_sock(sk);
1779 }
1780 
1781 
1782 static int
1783 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1784              int to_len, int nonblock, unsigned flags,
1785              struct sockaddr_in *addr, int *addr_len)
1786 {
1787         int result;
1788   
1789         /* 
1790          *      Have to check these first unlike the old code. If 
1791          *      we check them after we lose data on an error
1792          *      which is wrong 
1793          */
1794 
1795         if(addr_len)
1796                 *addr_len = sizeof(*addr);
1797         result=tcp_read(sk, to, to_len, nonblock, flags);
1798 
1799         if (result < 0) 
1800                 return(result);
1801   
1802         if(addr)
1803         {
1804                 addr->sin_family = AF_INET;
1805                 addr->sin_port = sk->dummy_th.dest;
1806                 addr->sin_addr.s_addr = sk->daddr;
1807         }
1808         return(result);
1809 }
1810 
1811 
1812 /*
1813  *      This routine will send an RST to the other tcp. 
1814  */
1815  
1816 static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1817           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1818 {
1819         struct sk_buff *buff;
1820         struct tcphdr *t1;
1821         int tmp;
1822         struct device *ndev=NULL;
1823   
1824 /*
1825  * We need to grab some memory, and put together an RST,
1826  * and then put it into the queue to be sent.
1827  */
1828 
1829         buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1830         if (buff == NULL) 
1831                 return;
1832 
1833         buff->len = sizeof(*t1);
1834         buff->sk = NULL;
1835         buff->dev = dev;
1836         buff->localroute = 0;
1837 
1838         t1 =(struct tcphdr *) buff->data;
1839 
1840         /*
1841          *      Put in the IP header and routing stuff. 
1842          */
1843 
1844         tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
1845                            sizeof(struct tcphdr),tos,ttl);
1846         if (tmp < 0) 
1847         {
1848                 buff->free = 1;
1849                 prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1850                 return;
1851         }
1852 
1853         t1 =(struct tcphdr *)((char *)t1 +tmp);
1854         buff->len += tmp;
1855         memcpy(t1, th, sizeof(*t1));
1856 
1857         /*
1858          *      Swap the send and the receive. 
1859          */
1860 
1861         t1->dest = th->source;
1862         t1->source = th->dest;
1863         t1->rst = 1;  
1864         t1->window = 0;
1865   
1866         if(th->ack)
1867         {
1868                 t1->ack = 0;
1869                 t1->seq = th->ack_seq;
1870                 t1->ack_seq = 0;
1871         }
1872         else
1873         {
1874                 t1->ack = 1;
1875                 if(!th->syn)
1876                         t1->ack_seq=htonl(th->seq);
1877                 else
1878                         t1->ack_seq=htonl(th->seq+1);
1879                 t1->seq=0;
1880         }
1881 
1882         t1->syn = 0;
1883         t1->urg = 0;
1884         t1->fin = 0;
1885         t1->psh = 0;
1886         t1->doff = sizeof(*t1)/4;
1887         tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1888         prot->queue_xmit(NULL, dev, buff, 1);
1889         tcp_statistics.TcpOutSegs++;
1890 }
1891 
1892 
1893 /*
1894  *      Look for tcp options. Parses everything but only knows about MSS.
1895  *      This routine is always called with the packet containing the SYN.
1896  *      However it may also be called with the ack to the SYN.  So you
1897  *      can't assume this is always the SYN.  It's always called after
1898  *      we have set up sk->mtu to our own MTU.
1899  */
1900  
1901 static void tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1902 {
1903         unsigned char *ptr;
1904         int length=(th->doff*4)-sizeof(struct tcphdr);
1905         int mss_seen = 0;
1906     
1907         ptr = (unsigned char *)(th + 1);
1908   
1909         while(length>0)
1910         {
1911                 int opcode=*ptr++;
1912                 int opsize=*ptr++;
1913                 switch(opcode)
1914                 {
1915                         case TCPOPT_EOL:
1916                                 return;
1917                         case TCPOPT_NOP:
1918                                 length-=2;
1919                                 continue;
1920                         
1921                         default:
1922                                 if(opsize<=2)   /* Avoid silly options looping forever */
1923                                         return;
1924                                 switch(opcode)
1925                                 {
1926                                         case TCPOPT_MSS:
1927                                                 if(opsize==4 && th->syn)
1928                                                 {
1929                                                         sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1930                                                         mss_seen = 1;
1931                                                 }
1932                                                 break;
1933                                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1934                                 }
1935                                 ptr+=opsize-2;
1936                                 length-=opsize;
1937                 }
1938         }
1939         if (th->syn) 
1940         {
1941                 if (! mss_seen)
1942                       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1943         }
1944 #ifdef CONFIG_INET_PCTCP
1945         sk->mss = min(sk->max_window >> 1, sk->mtu);
1946 #else    
1947         sk->mss = min(sk->max_window, sk->mtu);
1948 #endif  
1949 }
1950 
1951 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1952 {
1953         dst = ntohl(dst);
1954         if (IN_CLASSA(dst))
1955                 return htonl(IN_CLASSA_NET);
1956         if (IN_CLASSB(dst))
1957                 return htonl(IN_CLASSB_NET);
1958         return htonl(IN_CLASSC_NET);
1959 }
1960 
1961 /*
1962  *      This routine handles a connection request.
1963  *      It should make sure we haven't already responded.
1964  *      Because of the way BSD works, we have to send a syn/ack now.
1965  *      This also means it will be harder to close a socket which is
1966  *      listening.
1967  */
1968  
1969 static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1970                  unsigned long daddr, unsigned long saddr,
1971                  struct options *opt, struct device *dev)
1972 {
1973         struct sk_buff *buff;
1974         struct tcphdr *t1;
1975         unsigned char *ptr;
1976         struct sock *newsk;
1977         struct tcphdr *th;
1978         struct device *ndev=NULL;
1979         int tmp;
1980         struct rtable *rt;
1981   
1982         th = skb->h.th;
1983 
1984         /* If the socket is dead, don't accept the connection. */
1985         if (!sk->dead) 
1986         {
1987                 sk->data_ready(sk,0);
1988         }
1989         else 
1990         {
1991                 tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1992                 tcp_statistics.TcpAttemptFails++;
1993                 kfree_skb(skb, FREE_READ);
1994                 return;
1995         }
1996 
1997         /*
1998          * Make sure we can accept more.  This will prevent a
1999          * flurry of syns from eating up all our memory.
2000          */
2001 
2002         if (sk->ack_backlog >= sk->max_ack_backlog) 
2003         {
2004                 tcp_statistics.TcpAttemptFails++;
2005                 kfree_skb(skb, FREE_READ);
2006                 return;
2007         }
2008 
2009         /*
2010          * We need to build a new sock struct.
2011          * It is sort of bad to have a socket without an inode attached
2012          * to it, but the wake_up's will just wake up the listening socket,
2013          * and if the listening socket is destroyed before this is taken
2014          * off of the queue, this will take care of it.
2015          */
2016 
2017         newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
2018         if (newsk == NULL) 
2019         {
2020                 /* just ignore the syn.  It will get retransmitted. */
2021                 tcp_statistics.TcpAttemptFails++;
2022                 kfree_skb(skb, FREE_READ);
2023                 return;
2024         }
2025 
2026         memcpy(newsk, sk, sizeof(*newsk));
2027         skb_queue_head_init(&newsk->write_queue);
2028         skb_queue_head_init(&newsk->receive_queue);
2029         newsk->send_head = NULL;
2030         newsk->send_tail = NULL;
2031         skb_queue_head_init(&newsk->back_log);
2032         newsk->rtt = 0;         /*TCP_CONNECT_TIME<<3*/
2033         newsk->rto = TCP_TIMEOUT_INIT;
2034         newsk->mdev = 0;
2035         newsk->max_window = 0;
2036         newsk->cong_window = 1;
2037         newsk->cong_count = 0;
2038         newsk->ssthresh = 0;
2039         newsk->backoff = 0;
2040         newsk->blog = 0;
2041         newsk->intr = 0;
2042         newsk->proc = 0;
2043         newsk->done = 0;
2044         newsk->partial = NULL;
2045         newsk->pair = NULL;
2046         newsk->wmem_alloc = 0;
2047         newsk->rmem_alloc = 0;
2048         newsk->localroute = sk->localroute;
2049 
2050         newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
2051 
2052         newsk->err = 0;
2053         newsk->shutdown = 0;
2054         newsk->ack_backlog = 0;
2055         newsk->acked_seq = skb->h.th->seq+1;
2056         newsk->fin_seq = skb->h.th->seq;
2057         newsk->copied_seq = skb->h.th->seq;
2058         newsk->state = TCP_SYN_RECV;
2059         newsk->timeout = 0;
2060         newsk->write_seq = jiffies * SEQ_TICK - seq_offset;
2061         newsk->window_seq = newsk->write_seq;
2062         newsk->rcv_ack_seq = newsk->write_seq;
2063         newsk->urg_data = 0;
2064         newsk->retransmits = 0;
2065         newsk->destroy = 0;
2066         init_timer(&newsk->timer);
2067         newsk->timer.data = (unsigned long)newsk;
2068         newsk->timer.function = &net_timer;
2069         newsk->dummy_th.source = skb->h.th->dest;
2070         newsk->dummy_th.dest = skb->h.th->source;
2071         
2072         /*
2073          *      Swap these two, they are from our point of view. 
2074          */
2075          
2076         newsk->daddr = saddr;
2077         newsk->saddr = daddr;
2078 
2079         put_sock(newsk->num,newsk);
2080         newsk->dummy_th.res1 = 0;
2081         newsk->dummy_th.doff = 6;
2082         newsk->dummy_th.fin = 0;
2083         newsk->dummy_th.syn = 0;
2084         newsk->dummy_th.rst = 0;        
2085         newsk->dummy_th.psh = 0;
2086         newsk->dummy_th.ack = 0;
2087         newsk->dummy_th.urg = 0;
2088         newsk->dummy_th.res2 = 0;
2089         newsk->acked_seq = skb->h.th->seq + 1;
2090         newsk->copied_seq = skb->h.th->seq;
2091         newsk->socket = NULL;
2092 
2093         /*
2094          *      Grab the ttl and tos values and use them 
2095          */
2096 
2097         newsk->ip_ttl=sk->ip_ttl;
2098         newsk->ip_tos=skb->ip_hdr->tos;
2099 
2100         /*
2101          *      Use 512 or whatever user asked for 
2102          */
2103 
2104         /*
2105          *      Note use of sk->user_mss, since user has no direct access to newsk 
2106          */
2107 
2108         rt=ip_rt_route(saddr, NULL,NULL);
2109         
2110         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
2111                 newsk->window_clamp = rt->rt_window;
2112         else
2113                 newsk->window_clamp = 0;
2114                 
2115         if (sk->user_mss)
2116                 newsk->mtu = sk->user_mss;
2117         else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
2118                 newsk->mtu = rt->rt_mss - HEADER_SIZE;
2119         else 
2120         {
2121 #ifdef CONFIG_INET_SNARL        /* Sub Nets Are Local */
2122                 if ((saddr ^ daddr) & default_mask(saddr))
2123 #else
2124                 if ((saddr ^ daddr) & dev->pa_mask)
2125 #endif
2126                         newsk->mtu = 576 - HEADER_SIZE;
2127                 else
2128                         newsk->mtu = MAX_WINDOW;
2129         }
2130 
2131         /*
2132          *      But not bigger than device MTU 
2133          */
2134 
2135         newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
2136 
2137         /*
2138          *      This will min with what arrived in the packet 
2139          */
2140 
2141         tcp_options(newsk,skb->h.th);
2142 
2143         buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
2144         if (buff == NULL) 
2145         {
2146                 sk->err = -ENOMEM;
2147                 newsk->dead = 1;
2148                 release_sock(newsk);
2149                 kfree_skb(skb, FREE_READ);
2150                 tcp_statistics.TcpAttemptFails++;
2151                 return;
2152         }
2153   
2154         buff->len = sizeof(struct tcphdr)+4;
2155         buff->sk = newsk;
2156         buff->localroute = newsk->localroute;
2157 
2158         t1 =(struct tcphdr *) buff->data;
2159 
2160         /*
2161          *      Put in the IP header and routing stuff. 
2162          */
2163 
2164         tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
2165                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2166 
2167         /*
2168          *      Something went wrong. 
2169          */
2170 
2171         if (tmp < 0) 
2172         {
2173                 sk->err = tmp;
2174                 buff->free=1;
2175                 kfree_skb(buff,FREE_WRITE);
2176                 newsk->dead = 1;
2177                 release_sock(newsk);
2178                 skb->sk = sk;
2179                 kfree_skb(skb, FREE_READ);
2180                 tcp_statistics.TcpAttemptFails++;
2181                 return;
2182         }
2183 
2184         buff->len += tmp;
2185         t1 =(struct tcphdr *)((char *)t1 +tmp);
2186   
2187         memcpy(t1, skb->h.th, sizeof(*t1));
2188         buff->h.seq = newsk->write_seq;
2189         /*
2190          *      Swap the send and the receive. 
2191          */
2192         t1->dest = skb->h.th->source;
2193         t1->source = newsk->dummy_th.source;
2194         t1->seq = ntohl(newsk->write_seq++);
2195         t1->ack = 1;
2196         newsk->window = tcp_select_window(newsk);
2197         newsk->sent_seq = newsk->write_seq;
2198         t1->window = ntohs(newsk->window);
2199         t1->res1 = 0;
2200         t1->res2 = 0;
2201         t1->rst = 0;
2202         t1->urg = 0;
2203         t1->psh = 0;
2204         t1->syn = 1;
2205         t1->ack_seq = ntohl(skb->h.th->seq+1);
2206         t1->doff = sizeof(*t1)/4+1;
2207         ptr =(unsigned char *)(t1+1);
2208         ptr[0] = 2;
2209         ptr[1] = 4;
2210         ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2211         ptr[3] =(newsk->mtu) & 0xff;
2212 
2213         tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2214         newsk->prot->queue_xmit(newsk, dev, buff, 0);
2215 
2216         reset_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT);
2217         skb->sk = newsk;
2218 
2219         /*
2220          *      Charge the sock_buff to newsk. 
2221          */
2222          
2223         sk->rmem_alloc -= skb->mem_len;
2224         newsk->rmem_alloc += skb->mem_len;
2225         
2226         skb_queue_tail(&sk->receive_queue,skb);
2227         sk->ack_backlog++;
2228         release_sock(newsk);
2229         tcp_statistics.TcpOutSegs++;
2230 }
2231 
2232 
2233 static void tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2234 {
2235         struct sk_buff *buff;
2236         struct tcphdr *t1, *th;
2237         struct proto *prot;
2238         struct device *dev=NULL;
2239         int tmp;
2240 
2241         /*
2242          * We need to grab some memory, and put together a FIN, 
2243          * and then put it into the queue to be sent.
2244          */
2245         sk->inuse = 1;
2246         sk->keepopen = 1;
2247         sk->shutdown = SHUTDOWN_MASK;
2248 
2249         if (!sk->dead) 
2250                 sk->state_change(sk);
2251 
2252         if (timeout == 0) 
2253         {
2254                 /*
2255                  *  We need to flush the recv. buffs.  We do this only on the
2256                  *  descriptor close, not protocol-sourced closes, because the
2257                  *  reader process may not have drained the data yet!
2258                  */
2259 
2260                 if (skb_peek(&sk->receive_queue) != NULL) 
2261                 {
2262                         struct sk_buff *skb;
2263                         if(sk->debug)
2264                                 printk("Clean rcv queue\n");
2265                         while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
2266                                 kfree_skb(skb, FREE_READ);
2267                         if(sk->debug)
2268                                 printk("Cleaned.\n");
2269                 }
2270         }
2271 
2272         /*
2273          *      Get rid off any half-completed packets. 
2274          */
2275          
2276         if (sk->partial) 
2277         {
2278                 tcp_send_partial(sk);
2279         }
2280 
2281         switch(sk->state) 
2282         {
2283                 case TCP_FIN_WAIT1:
2284                 case TCP_FIN_WAIT2:
2285                 case TCP_CLOSING:
2286                         /*
2287                          * These states occur when we have already closed out
2288                          * our end.  If there is no timeout, we do not do
2289                          * anything.  We may still be in the middle of sending
2290                          * the remainder of our buffer, for example...
2291                          * resetting the timer would be inappropriate.
2292                          *
2293                          * XXX if retransmit count reaches limit, is tcp_close()
2294                          * called with timeout == 1 ? if not, we need to fix that.
2295                          */
2296                         if (!timeout) {
2297                                 int timer_active;
2298 
2299                                 timer_active = del_timer(&sk->timer);
2300                                 if (timer_active)
2301                                         add_timer(&sk->timer);
2302                                 else
2303                                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2304                         }
2305                         if (timeout) 
2306                                 tcp_time_wait(sk);
2307                         release_sock(sk);
2308                         return; /* break causes a double release - messy */
2309                 case TCP_TIME_WAIT:
2310                 case TCP_LAST_ACK:
2311                         /*
2312                          * A timeout from these states terminates the TCB.
2313                          */
2314                         if (timeout) 
2315                         {
2316                                 tcp_set_state(sk,TCP_CLOSE);
2317                         }
2318                         release_sock(sk);
2319                         return;
2320                 case TCP_LISTEN:
2321                         tcp_set_state(sk,TCP_CLOSE);
2322                         release_sock(sk);
2323                         return;
2324                 case TCP_CLOSE:
2325                         release_sock(sk);
2326                         return;
2327                 case TCP_CLOSE_WAIT:
2328                 case TCP_ESTABLISHED:
2329                 case TCP_SYN_SENT:
2330                 case TCP_SYN_RECV:
2331                         prot =(struct proto *)sk->prot;
2332                         th =(struct tcphdr *)&sk->dummy_th;
2333                         buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2334                         if (buff == NULL) 
2335                         {
2336                                 /* This will force it to try again later. */
2337                                 /* Or it would have if someone released the socket
2338                                    first. Anyway it might work now */
2339                                 release_sock(sk);
2340                                 if (sk->state != TCP_CLOSE_WAIT)
2341                                         tcp_set_state(sk,TCP_ESTABLISHED);
2342                                 reset_timer(sk, TIME_CLOSE, 100);
2343                                 return;
2344                         }
2345                         buff->sk = sk;
2346                         buff->free = 1;
2347                         buff->len = sizeof(*t1);
2348                         buff->localroute = sk->localroute;
2349                         t1 =(struct tcphdr *) buff->data;
2350         
2351                         /*
2352                          *      Put in the IP header and routing stuff. 
2353                          */
2354                         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2355                                          IPPROTO_TCP, sk->opt,
2356                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2357                         if (tmp < 0) 
2358                         {
2359                                 sk->write_seq++;        /* Very important 8) */
2360                                 kfree_skb(buff,FREE_WRITE);
2361 
2362                                 /*
2363                                  * Enter FIN_WAIT1 to await completion of
2364                                  * written out data and ACK to our FIN.
2365                                  */
2366 
2367                                 if(sk->state==TCP_ESTABLISHED)
2368                                         tcp_set_state(sk,TCP_FIN_WAIT1);
2369                                 else
2370                                         tcp_set_state(sk,TCP_FIN_WAIT2);
2371                                 reset_timer(sk, TIME_CLOSE,4*sk->rto);
2372                                 if(timeout)
2373                                         tcp_time_wait(sk);
2374 
2375                                 release_sock(sk);
2376                                 return;
2377                         }
2378 
2379                         t1 =(struct tcphdr *)((char *)t1 +tmp);
2380                         buff->len += tmp;
2381                         buff->dev = dev;
2382                         memcpy(t1, th, sizeof(*t1));
2383                         t1->seq = ntohl(sk->write_seq);
2384                         sk->write_seq++;
2385                         buff->h.seq = sk->write_seq;
2386                         t1->ack = 1;
2387         
2388                         /* 
2389                          *      Ack everything immediately from now on. 
2390                          */
2391 
2392                         sk->delay_acks = 0;
2393                         t1->ack_seq = ntohl(sk->acked_seq);
2394                         t1->window = ntohs(sk->window=tcp_select_window(sk));
2395                         t1->fin = 1;
2396                         t1->rst = 0;
2397                         t1->doff = sizeof(*t1)/4;
2398                         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2399 
2400                         tcp_statistics.TcpOutSegs++;
2401         
2402                         if (skb_peek(&sk->write_queue) == NULL) 
2403                         {
2404                                 sk->sent_seq = sk->write_seq;
2405                                 prot->queue_xmit(sk, dev, buff, 0);
2406                         } 
2407                         else 
2408                         {
2409                                 reset_timer(sk, TIME_WRITE, sk->rto);
2410                                 if (buff->next != NULL) 
2411                                 {
2412                                         printk("tcp_close: next != NULL\n");
2413                                         skb_unlink(buff);
2414                                 }
2415                                 skb_queue_tail(&sk->write_queue, buff);
2416                         }
2417 
2418                         /*
2419                          * If established (normal close), enter FIN_WAIT1.
2420                          * If in CLOSE_WAIT, enter LAST_ACK
2421                          * If in CLOSING, remain in CLOSING
2422                          * otherwise enter FIN_WAIT2
2423                          */
2424 
2425                         if (sk->state == TCP_ESTABLISHED)
2426                                 tcp_set_state(sk,TCP_FIN_WAIT1);
2427                         else if (sk->state == TCP_CLOSE_WAIT)
2428                                 tcp_set_state(sk,TCP_LAST_ACK);
2429                         else if (sk->state != TCP_CLOSING)
2430                                 tcp_set_state(sk,TCP_FIN_WAIT2);
2431         }
2432         release_sock(sk);
2433 }
2434 
2435 
2436 /*
2437  * This routine takes stuff off of the write queue,
2438  * and puts it in the xmit queue.
2439  */
2440 static void
2441 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2442 {
2443         struct sk_buff *skb;
2444 
2445         /*
2446          *      The bytes will have to remain here. In time closedown will
2447          *      empty the write queue and all will be happy 
2448          */
2449 
2450         if(sk->zapped)
2451                 return;
2452 
2453         while((skb = skb_peek(&sk->write_queue)) != NULL &&
2454                 before(skb->h.seq, sk->window_seq + 1) &&
2455                 (sk->retransmits == 0 ||
2456                  sk->timeout != TIME_WRITE ||
2457                  before(skb->h.seq, sk->rcv_ack_seq + 1))
2458                 && sk->packets_out < sk->cong_window) 
2459         {
2460                 IS_SKB(skb);
2461                 skb_unlink(skb);
2462                 /* See if we really need to send the packet. */
2463                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) 
2464                 {
2465                         sk->retransmits = 0;
2466                         kfree_skb(skb, FREE_WRITE);
2467                         if (!sk->dead) 
2468                                 sk->write_space(sk);
2469                 } 
2470                 else
2471                 {
2472                         struct tcphdr *th;
2473                         struct iphdr *iph;
2474                         int size;
2475 /*
2476  * put in the ack seq and window at this point rather than earlier,
2477  * in order to keep them monotonic.  We really want to avoid taking
2478  * back window allocations.  That's legal, but RFC1122 says it's frowned on.
2479  * Ack and window will in general have changed since this packet was put
2480  * on the write queue.
2481  */
2482                         iph = (struct iphdr *)(skb->data +
2483                                                skb->dev->hard_header_len);
2484                         th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
2485                         size = skb->len - (((unsigned char *) th) - skb->data);
2486                         
2487                         th->ack_seq = ntohl(sk->acked_seq);
2488                         th->window = ntohs(tcp_select_window(sk));
2489 
2490                         tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
2491 
2492                         sk->sent_seq = skb->h.seq;
2493                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2494                 }
2495         }
2496 }
2497 
2498 
2499 /*
2500  *      This routine deals with incoming acks, but not outgoing ones.
2501  */
2502 
2503 static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2504 {
2505         unsigned long ack;
2506         int flag = 0;
2507 
2508         /* 
2509          * 1 - there was data in packet as well as ack or new data is sent or 
2510          *     in shutdown state
2511          * 2 - data from retransmit queue was acked and removed
2512          * 4 - window shrunk or data from retransmit queue was acked and removed
2513          */
2514 
2515         if(sk->zapped)
2516                 return(1);      /* Dead, cant ack any more so why bother */
2517 
2518         ack = ntohl(th->ack_seq);
2519         if (ntohs(th->window) > sk->max_window) 
2520         {
2521                 sk->max_window = ntohs(th->window);
2522 #ifdef CONFIG_INET_PCTCP
2523                 sk->mss = min(sk->max_window>>1, sk->mtu);
2524 #else
2525                 sk->mss = min(sk->max_window, sk->mtu);
2526 #endif  
2527         }
2528 
2529         if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2530                 sk->retransmits = 0;
2531 
2532         if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
2533         {
2534                 if(sk->debug)
2535                         printk("Ack ignored %lu %lu\n",ack,sk->sent_seq);
2536                         
2537                 /*
2538                  *      Keepalive processing.
2539                  */
2540                  
2541                 if (after(ack, sk->sent_seq) || (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) 
2542                 {
2543                         return(0);
2544                 }
2545                 if (sk->keepopen) 
2546                 {
2547                         if(sk->timeout==TIME_KEEPOPEN)
2548                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2549                 }
2550                 return(1);
2551         }
2552 
2553         if (len != th->doff*4) 
2554                 flag |= 1;
2555 
2556         /* See if our window has been shrunk. */
2557 
2558         if (after(sk->window_seq, ack+ntohs(th->window))) 
2559         {
2560                 /*
2561                  * We may need to move packets from the send queue
2562                  * to the write queue, if the window has been shrunk on us.
2563                  * The RFC says you are not allowed to shrink your window
2564                  * like this, but if the other end does, you must be able
2565                  * to deal with it.
2566                  */
2567                 struct sk_buff *skb;
2568                 struct sk_buff *skb2;
2569                 struct sk_buff *wskb = NULL;
2570         
2571                 skb2 = sk->send_head;
2572                 sk->send_head = NULL;
2573                 sk->send_tail = NULL;
2574         
2575                 flag |= 4;
2576         
2577                 sk->window_seq = ack + ntohs(th->window);
2578                 cli();
2579                 while (skb2 != NULL) 
2580                 {
2581                         skb = skb2;
2582                         skb2 = skb->link3;
2583                         skb->link3 = NULL;
2584                         if (after(skb->h.seq, sk->window_seq)) 
2585                         {
2586                                 if (sk->packets_out > 0) 
2587                                         sk->packets_out--;
2588                                 /* We may need to remove this from the dev send list. */
2589                                 if (skb->next != NULL) 
2590                                 {
2591                                         skb_unlink(skb);                                
2592                                 }
2593                                 /* Now add it to the write_queue. */
2594                                 if (wskb == NULL)
2595                                         skb_queue_head(&sk->write_queue,skb);
2596                                 else
2597                                         skb_append(wskb,skb);
2598                                 wskb = skb;
2599                         } 
2600                         else 
2601                         {
2602                                 if (sk->send_head == NULL) 
2603                                 {
2604                                         sk->send_head = skb;
2605                                         sk->send_tail = skb;
2606                                 }
2607                                 else
2608                                 {
2609                                         sk->send_tail->link3 = skb;
2610                                         sk->send_tail = skb;
2611                                 }
2612                                 skb->link3 = NULL;
2613                         }
2614                 }
2615                 sti();
2616         }
2617 
2618         if (sk->send_tail == NULL || sk->send_head == NULL) 
2619         {
2620                 sk->send_head = NULL;
2621                 sk->send_tail = NULL;
2622                 sk->packets_out= 0;
2623         }
2624 
2625         sk->window_seq = ack + ntohs(th->window);
2626 
2627         /* We don't want too many packets out there. */
2628         if (sk->timeout == TIME_WRITE && 
2629                 sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
2630         {
2631 /* 
2632  * This is Jacobson's slow start and congestion avoidance. 
2633  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2634  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2635  * counter and increment it once every cwnd times.  It's possible
2636  * that this should be done only if sk->retransmits == 0.  I'm
2637  * interpreting "new data is acked" as including data that has
2638  * been retransmitted but is just now being acked.
2639  */
2640                 if (sk->cong_window < sk->ssthresh)  
2641                   /* 
2642                    *    In "safe" area, increase
2643                    */
2644                         sk->cong_window++;
2645                 else 
2646                 {
2647                   /*
2648                    *    In dangerous area, increase slowly.  In theory this is
2649                    *    sk->cong_window += 1 / sk->cong_window
2650                    */
2651                         if (sk->cong_count >= sk->cong_window) 
2652                         {
2653                                 sk->cong_window++;
2654                                 sk->cong_count = 0;
2655                         }
2656                         else 
2657                                 sk->cong_count++;
2658                 }
2659         }
2660 
2661         sk->rcv_ack_seq = ack;
2662 
2663         /*
2664          * if this ack opens up a zero window, clear backoff.  It was
2665          * being used to time the probes, and is probably far higher than
2666          * it needs to be for normal retransmission.
2667          */
2668 
2669         if (sk->timeout == TIME_PROBE0) 
2670         {
2671                 if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
2672                     ! before (sk->window_seq, sk->write_queue.next->h.seq)) 
2673                 {
2674                         sk->retransmits = 0;
2675                         sk->backoff = 0;
2676                   /*
2677                    *    Recompute rto from rtt.  this eliminates any backoff.
2678                    */
2679 
2680                         sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2681                         if (sk->rto > 120*HZ)
2682                                 sk->rto = 120*HZ;
2683                         if (sk->rto < 20)       /* Was 1*HZ, then 1 - turns out we must allow about
2684                                                    .2 of a second because of BSD delayed acks - on a 100Mb/sec link
2685                                                    .2 of a second is going to need huge windows (SIGH) */
2686                                 sk->rto = 20;
2687                 }
2688         }
2689 
2690   /* 
2691    *    See if we can take anything off of the retransmit queue.
2692    */
2693    
2694         while(sk->send_head != NULL) 
2695         {
2696                 /* Check for a bug. */
2697                 if (sk->send_head->link3 &&
2698                     after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) 
2699                         printk("INET: tcp.c: *** bug send_list out of order.\n");
2700                 if (before(sk->send_head->h.seq, ack+1)) 
2701                 {
2702                         struct sk_buff *oskb;   
2703                         if (sk->retransmits) 
2704                         {       
2705                                 /*
2706                                  *      We were retransmitting.  don't count this in RTT est 
2707                                  */
2708                                 flag |= 2;
2709 
2710                                 /*
2711                                  * even though we've gotten an ack, we're still
2712                                  * retransmitting as long as we're sending from
2713                                  * the retransmit queue.  Keeping retransmits non-zero
2714                                  * prevents us from getting new data interspersed with
2715                                  * retransmissions.
2716                                  */
2717 
2718                                 if (sk->send_head->link3)
2719                                         sk->retransmits = 1;
2720                                 else
2721                                         sk->retransmits = 0;
2722                         }
2723                         /*
2724                          * Note that we only reset backoff and rto in the
2725                          * rtt recomputation code.  And that doesn't happen
2726                          * if there were retransmissions in effect.  So the
2727                          * first new packet after the retransmissions is
2728                          * sent with the backoff still in effect.  Not until
2729                          * we get an ack from a non-retransmitted packet do
2730                          * we reset the backoff and rto.  This allows us to deal
2731                          * with a situation where the network delay has increased
2732                          * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2733                          */
2734 
2735                         /*
2736                          *      We have one less packet out there. 
2737                          */
2738                          
2739                         if (sk->packets_out > 0) 
2740                                 sk->packets_out --;
2741                         /* 
2742                          *      Wake up the process, it can probably write more. 
2743                          */
2744                         if (!sk->dead) 
2745                                 sk->write_space(sk);
2746                         oskb = sk->send_head;
2747 
2748                         if (!(flag&2)) 
2749                         {
2750                                 long m;
2751         
2752                                 /*
2753                                  *      The following amusing code comes from Jacobson's
2754                                  *      article in SIGCOMM '88.  Note that rtt and mdev
2755                                  *      are scaled versions of rtt and mean deviation.
2756                                  *      This is designed to be as fast as possible 
2757                                  *      m stands for "measurement".
2758                                  */
2759         
2760                                 m = jiffies - oskb->when;  /* RTT */
2761                                 if(m<=0)
2762                                         m=1;            /* IS THIS RIGHT FOR <0 ??? */
2763                                 m -= (sk->rtt >> 3);    /* m is now error in rtt est */
2764                                 sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
2765                                 if (m < 0)
2766                                         m = -m;         /* m is now abs(error) */
2767                                 m -= (sk->mdev >> 2);   /* similar update on mdev */
2768                                 sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
2769         
2770                                 /*
2771                                  *      Now update timeout.  Note that this removes any backoff.
2772                                  */
2773                          
2774                                 sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2775                                 if (sk->rto > 120*HZ)
2776                                         sk->rto = 120*HZ;
2777                                 if (sk->rto < 20)       /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
2778                                         sk->rto = 20;
2779                                 sk->backoff = 0;
2780                         }
2781                         flag |= (2|4);
2782                         cli();
2783                         oskb = sk->send_head;
2784                         IS_SKB(oskb);
2785                         sk->send_head = oskb->link3;
2786                         if (sk->send_head == NULL) 
2787                         {
2788                                 sk->send_tail = NULL;
2789                         }
2790 
2791                 /*
2792                  *      We may need to remove this from the dev send list. 
2793                  */
2794 
2795                         if (oskb->next)
2796                                 skb_unlink(oskb);
2797                         sti();
2798                         kfree_skb(oskb, FREE_WRITE); /* write. */
2799                         if (!sk->dead) 
2800                                 sk->write_space(sk);
2801                 }
2802                 else
2803                 {
2804                         break;
2805                 }
2806         }
2807 
2808         /*
2809          * XXX someone ought to look at this too.. at the moment, if skb_peek()
2810          * returns non-NULL, we complete ignore the timer stuff in the else
2811          * clause.  We ought to organize the code so that else clause can
2812          * (should) be executed regardless, possibly moving the PROBE timer
2813          * reset over.  The skb_peek() thing should only move stuff to the
2814          * write queue, NOT also manage the timer functions.
2815          */
2816 
2817         /*
2818          * Maybe we can take some stuff off of the write queue,
2819          * and put it onto the xmit queue.
2820          */
2821         if (skb_peek(&sk->write_queue) != NULL) 
2822         {
2823                 if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
2824                         (sk->retransmits == 0 || 
2825                          sk->timeout != TIME_WRITE ||
2826                          before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
2827                         && sk->packets_out < sk->cong_window) 
2828                 {
2829                         flag |= 1;
2830                         tcp_write_xmit(sk);
2831                 }
2832                 else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
2833                         sk->send_head == NULL &&
2834                         sk->ack_backlog == 0 &&
2835                         sk->state != TCP_TIME_WAIT) 
2836                 {
2837                         reset_timer(sk, TIME_PROBE0, sk->rto);
2838                 }               
2839         }
2840         else
2841         {
2842                 /*
2843                  * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
2844                  * from TCP_CLOSE we don't do anything
2845                  *
2846                  * from anything else, if there is write data (or fin) pending,
2847                  * we use a TIME_WRITE timeout, else if keepalive we reset to
2848                  * a KEEPALIVE timeout, else we delete the timer.
2849                  *
2850                  * We do not set flag for nominal write data, otherwise we may
2851                  * force a state where we start to write itsy bitsy tidbits
2852                  * of data.
2853                  */
2854 
2855                 switch(sk->state) {
2856                 case TCP_TIME_WAIT:
2857                         /*
2858                          * keep us in TIME_WAIT until we stop getting packets,
2859                          * reset the timeout.
2860                          */
2861                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2862                         break;
2863                 case TCP_CLOSE:
2864                         /*
2865                          * don't touch the timer.
2866                          */
2867                         break;
2868                 default:
2869                         /*
2870                          * must check send_head, write_queue, and ack_backlog
2871                          * to determine which timeout to use.
2872                          */
2873                         if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) {
2874                                 reset_timer(sk, TIME_WRITE, sk->rto);
2875                         } else if (sk->keepopen) {
2876                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2877                         } else {
2878                                 delete_timer(sk);
2879                         }
2880                         break;
2881                 }
2882 #ifdef NOTDEF
2883                 if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2884                 sk->state != TCP_TIME_WAIT && !sk->keepopen) 
2885                 {
2886                         if (!sk->dead)
2887                                 sk->write_space(sk);
2888                         if (sk->keepopen) {
2889                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2890                         } else {
2891                                 delete_timer(sk);
2892                         }
2893                 }
2894                 else
2895                 {
2896                         if (sk->state != (unsigned char) sk->keepopen) 
2897                         {
2898                                 reset_timer(sk, TIME_WRITE, sk->rto);
2899                         }
2900                         if (sk->state == TCP_TIME_WAIT) 
2901                         {
2902                                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2903                         }       
2904                 }
2905 #endif
2906         }
2907 
2908         if (sk->packets_out == 0 && sk->partial != NULL &&
2909                 skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
2910         {
2911                 flag |= 1;
2912                 tcp_send_partial(sk);
2913         }
2914 
2915         /*
2916          * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
2917          * we are now waiting for an acknowledge to our FIN.  The other end is
2918          * already in TIME_WAIT.
2919          *
2920          * Move to TCP_CLOSE on success.
2921          */
2922 
2923         if (sk->state == TCP_LAST_ACK) 
2924         {
2925                 if (!sk->dead)
2926                         sk->state_change(sk);
2927                 if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) 
2928                 {
2929                         flag |= 1;
2930                         tcp_set_state(sk,TCP_CLOSE);
2931                         sk->shutdown = SHUTDOWN_MASK;
2932                 }
2933         }
2934 
2935         /*
2936          * Incoming ACK to a FIN we sent in the case of our initiating the close.
2937          *
2938          * Move to FIN_WAIT2 to await a FIN from the other end. Set
2939          * SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in.
2940          */
2941 
2942         if (sk->state == TCP_FIN_WAIT1) 
2943         {
2944 
2945                 if (!sk->dead) 
2946                         sk->state_change(sk);
2947                 if (sk->rcv_ack_seq == sk->write_seq) 
2948                 {
2949                         flag |= 1;
2950                         sk->shutdown |= SEND_SHUTDOWN;
2951                         tcp_set_state(sk,TCP_FIN_WAIT2);
2952                 }
2953         }
2954 
2955         /*
2956          *      Incoming ACK to a FIN we sent in the case of a simultaneous close.
2957          *
2958          *      Move to TIME_WAIT
2959          */
2960 
2961         if (sk->state == TCP_CLOSING) 
2962         {
2963 
2964                 if (!sk->dead) 
2965                         sk->state_change(sk);
2966                 if (sk->rcv_ack_seq == sk->write_seq) 
2967                 {
2968                         flag |= 1;
2969                         tcp_time_wait(sk);
2970                 }
2971         }
2972 
2973         /*
2974          * I make no guarantees about the first clause in the following
2975          * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
2976          * what conditions "!flag" would be true.  However I think the rest
2977          * of the conditions would prevent that from causing any
2978          * unnecessary retransmission. 
2979          *   Clearly if the first packet has expired it should be 
2980          * retransmitted.  The other alternative, "flag&2 && retransmits", is
2981          * harder to explain:  You have to look carefully at how and when the
2982          * timer is set and with what timeout.  The most recent transmission always
2983          * sets the timer.  So in general if the most recent thing has timed
2984          * out, everything before it has as well.  So we want to go ahead and
2985          * retransmit some more.  If we didn't explicitly test for this
2986          * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
2987          * would not be true.  If you look at the pattern of timing, you can
2988          * show that rto is increased fast enough that the next packet would
2989          * almost never be retransmitted immediately.  Then you'd end up
2990          * waiting for a timeout to send each packet on the retransmission
2991          * queue.  With my implementation of the Karn sampling algorithm,
2992          * the timeout would double each time.  The net result is that it would
2993          * take a hideous amount of time to recover from a single dropped packet.
2994          * It's possible that there should also be a test for TIME_WRITE, but
2995          * I think as long as "send_head != NULL" and "retransmit" is on, we've
2996          * got to be in real retransmission mode.
2997          *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
2998          * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
2999          * As long as no further losses occur, this seems reasonable.
3000          */
3001         
3002         if (((!flag) || (flag&4)) && sk->send_head != NULL &&
3003                (((flag&2) && sk->retransmits) ||
3004                (sk->send_head->when + sk->rto < jiffies))) 
3005         {
3006                 ip_do_retransmit(sk, 1);
3007                 reset_timer(sk, TIME_WRITE, sk->rto);
3008         }
3009 
3010         return(1);
3011 }
3012 
3013 
3014 /*
3015  *      This routine handles the data.  If there is room in the buffer,
3016  *      it will be have already been moved into it.  If there is no
3017  *      room, then we will just have to discard the packet.
3018  */
3019 
3020 static int tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
3021          unsigned long saddr, unsigned short len)
3022 {
3023         struct sk_buff *skb1, *skb2;
3024         struct tcphdr *th;
3025         int dup_dumped=0;
3026         unsigned long new_seq;
3027 
3028         th = skb->h.th;
3029         skb->len = len -(th->doff*4);
3030 
3031         /* The bytes in the receive read/assembly queue has increased. Needed for the
3032            low memory discard algorithm */
3033            
3034         sk->bytes_rcv += skb->len;
3035         
3036         if (skb->len == 0 && !th->fin && !th->urg && !th->psh) 
3037         {
3038                 /* 
3039                  *      Don't want to keep passing ack's back and forth. 
3040                  *      (someone sent us dataless, boring frame)
3041                  */
3042                 if (!th->ack)
3043                         tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
3044                 kfree_skb(skb, FREE_READ);
3045                 return(0);
3046         }
3047         
3048         /*
3049          *      We no longer have anyone receiving data on this connection.
3050          */
3051 
3052         if(sk->shutdown & RCV_SHUTDOWN)
3053         {
3054                 new_seq= th->seq + skb->len + th->syn;  /* Right edge of _data_ part of frame */
3055                 
3056                 if(after(new_seq,sk->acked_seq+1))      /* If the right edge of this frame is after the last copied byte
3057                                                            then it contains data we will never touch. We send an RST to 
3058                                                            ensure the far end knows it never got to the application */
3059                 {
3060                         sk->acked_seq = new_seq + th->fin;
3061                         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
3062                                 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
3063                         tcp_statistics.TcpEstabResets++;
3064                         tcp_set_state(sk,TCP_CLOSE);
3065                         sk->err = EPIPE;
3066                         sk->shutdown = SHUTDOWN_MASK;
3067                         kfree_skb(skb, FREE_READ);
3068                         if (!sk->dead)
3069                                 sk->state_change(sk);
3070                         return(0);
3071                 }
3072         }
3073         /*
3074          *      Now we have to walk the chain, and figure out where this one
3075          *      goes into it.  This is set up so that the last packet we received
3076          *      will be the first one we look at, that way if everything comes
3077          *      in order, there will be no performance loss, and if they come
3078          *      out of order we will be able to fit things in nicely.
3079          */
3080 
3081         /* 
3082          *      This should start at the last one, and then go around forwards.
3083          */
3084 
3085         if (skb_peek(&sk->receive_queue) == NULL)       /* Empty queue is easy case */
3086         {
3087                 skb_queue_head(&sk->receive_queue,skb);
3088                 skb1= NULL;
3089         } 
3090         else
3091         {
3092                 for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
3093                 {
3094                         if(sk->debug)
3095                         {
3096                                 printk("skb1=%p :", skb1);
3097                                 printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
3098                                 printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
3099                                 printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
3100                                                 sk->acked_seq);
3101                         }
3102                         
3103                         /*
3104                          *      Optimisation: Duplicate frame or extension of previous frame from
3105                          *      same sequence point (lost ack case).
3106                          *      The frame contains duplicate data or replaces a previous frame
3107                          *      discard the previous frame (safe as sk->inuse is set) and put
3108                          *      the new one in its place.
3109                          */
3110                          
3111                         if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
3112                         {
3113                                 skb_append(skb1,skb);
3114                                 skb_unlink(skb1);
3115                                 kfree_skb(skb1,FREE_READ);
3116                                 dup_dumped=1;
3117                                 skb1=NULL;
3118                                 break;
3119                         }
3120                         
3121                         /*
3122                          *      Found where it fits
3123                          */
3124                          
3125                         if (after(th->seq+1, skb1->h.th->seq))
3126                         {
3127                                 skb_append(skb1,skb);
3128                                 break;
3129                         }
3130                         
3131                         /*
3132                          *      See if we've hit the start. If so insert.
3133                          */
3134                         if (skb1 == skb_peek(&sk->receive_queue))
3135                         {
3136                                 skb_queue_head(&sk->receive_queue, skb);
3137                                 break;
3138                         }
3139                 }
3140         }
3141 
3142         /*
3143          *      Figure out what the ack value for this frame is
3144          */
3145          
3146         th->ack_seq = th->seq + skb->len;
3147         if (th->syn) 
3148                 th->ack_seq++;
3149         if (th->fin)
3150                 th->ack_seq++;
3151 
3152         if (before(sk->acked_seq, sk->copied_seq)) 
3153         {
3154                 printk("*** tcp.c:tcp_data bug acked < copied\n");
3155                 sk->acked_seq = sk->copied_seq;
3156         }
3157 
3158         /*
3159          *      Now figure out if we can ack anything.
3160          */
3161 
3162         if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) 
3163         {
3164                 if (before(th->seq, sk->acked_seq+1)) 
3165                 {
3166                         int newwindow;
3167 
3168                         if (after(th->ack_seq, sk->acked_seq)) 
3169                         {
3170                                 newwindow = sk->window-(th->ack_seq - sk->acked_seq);
3171                                 if (newwindow < 0)
3172                                         newwindow = 0;  
3173                                 sk->window = newwindow;
3174                                 sk->acked_seq = th->ack_seq;
3175                         }
3176                         skb->acked = 1;
3177 
3178                         /* 
3179                          *      When we ack the fin, we turn on the RCV_SHUTDOWN flag.
3180                          */
3181 
3182                         if (skb->h.th->fin) 
3183                         {
3184                                 if (!sk->dead) 
3185                                         sk->state_change(sk);
3186                                 sk->shutdown |= RCV_SHUTDOWN;
3187                         }
3188           
3189                         for(skb2 = skb->next;
3190                             skb2 != (struct sk_buff *)&sk->receive_queue;
3191                             skb2 = skb2->next) 
3192                         {
3193                                 if (before(skb2->h.th->seq, sk->acked_seq+1)) 
3194                                 {
3195                                         if (after(skb2->h.th->ack_seq, sk->acked_seq))
3196                                         {
3197                                                 newwindow = sk->window -
3198                                                  (skb2->h.th->ack_seq - sk->acked_seq);
3199                                                 if (newwindow < 0)
3200                                                         newwindow = 0;  
3201                                                 sk->window = newwindow;
3202                                                 sk->acked_seq = skb2->h.th->ack_seq;
3203                                         }
3204                                         skb2->acked = 1;
3205                                         /*
3206                                          *      When we ack the fin, we turn on
3207                                          *      the RCV_SHUTDOWN flag.
3208                                          */
3209                                         if (skb2->h.th->fin) 
3210                                         {
3211                                                 sk->shutdown |= RCV_SHUTDOWN;
3212                                                 if (!sk->dead)
3213                                                         sk->state_change(sk);
3214                                         }
3215 
3216                                         /*
3217                                          *      Force an immediate ack.
3218                                          */
3219                                          
3220                                         sk->ack_backlog = sk->max_ack_backlog;
3221                                 }
3222                                 else
3223                                 {
3224                                         break;
3225                                 }
3226                         }
3227 
3228                         /*
3229                          *      This also takes care of updating the window.
3230                          *      This if statement needs to be simplified.
3231                          */
3232                         if (!sk->delay_acks ||
3233                             sk->ack_backlog >= sk->max_ack_backlog || 
3234                             sk->bytes_rcv > sk->max_unacked || th->fin) {
3235         /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
3236                         }
3237                         else 
3238                         {
3239                                 sk->ack_backlog++;
3240                                 if(sk->debug)
3241                                         printk("Ack queued.\n");
3242                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3243                         }
3244                 }
3245         }
3246 
3247         /*
3248          *      If we've missed a packet, send an ack.
3249          *      Also start a timer to send another.
3250          */
3251          
3252         if (!skb->acked) 
3253         {
3254         
3255         /*
3256          *      This is important.  If we don't have much room left,
3257          *      we need to throw out a few packets so we have a good
3258          *      window.  Note that mtu is used, not mss, because mss is really
3259          *      for the send side.  He could be sending us stuff as large as mtu.
3260          */
3261                  
3262                 while (sk->prot->rspace(sk) < sk->mtu) 
3263                 {
3264                         skb1 = skb_peek(&sk->receive_queue);
3265                         if (skb1 == NULL) 
3266                         {
3267                                 printk("INET: tcp.c:tcp_data memory leak detected.\n");
3268                                 break;
3269                         }
3270 
3271                         /*
3272                          *      Don't throw out something that has been acked. 
3273                          */
3274                  
3275                         if (skb1->acked) 
3276                         {
3277                                 break;
3278                         }
3279                 
3280                         skb_unlink(skb1);
3281                         kfree_skb(skb1, FREE_READ);
3282                 }
3283                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3284                 sk->ack_backlog++;
3285                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3286         }
3287         else
3288         {
3289                 /* We missed a packet.  Send an ack to try to resync things. */
3290                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3291         }
3292 
3293         /*
3294          *      Now tell the user we may have some data. 
3295          */
3296          
3297         if (!sk->dead) 
3298         {
3299                 if(sk->debug)
3300                         printk("Data wakeup.\n");
3301                 sk->data_ready(sk,0);
3302         } 
3303         return(0);
3304 }
3305 
3306 
3307 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
3308 {
3309         unsigned long ptr = ntohs(th->urg_ptr);
3310 
3311         if (ptr)
3312                 ptr--;
3313         ptr += th->seq;
3314 
3315         /* ignore urgent data that we've already seen and read */
3316         if (after(sk->copied_seq+1, ptr))
3317                 return;
3318 
3319         /* do we already have a newer (or duplicate) urgent pointer? */
3320         if (sk->urg_data && !after(ptr, sk->urg_seq))
3321                 return;
3322 
3323         /* tell the world about our new urgent pointer */
3324         if (sk->proc != 0) {
3325                 if (sk->proc > 0) {
3326                         kill_proc(sk->proc, SIGURG, 1);
3327                 } else {
3328                         kill_pg(-sk->proc, SIGURG, 1);
3329                 }
3330         }
3331         sk->urg_data = URG_NOTYET;
3332         sk->urg_seq = ptr;
3333 }
3334 
3335 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
3336         unsigned long saddr, unsigned long len)
3337 {
3338         unsigned long ptr;
3339 
3340         /* check if we get a new urgent pointer */
3341         if (th->urg)
3342                 tcp_check_urg(sk,th);
3343 
3344         /* do we wait for any urgent data? */
3345         if (sk->urg_data != URG_NOTYET)
3346                 return 0;
3347 
3348         /* is the urgent pointer pointing into this packet? */
3349         ptr = sk->urg_seq - th->seq + th->doff*4;
3350         if (ptr >= len)
3351                 return 0;
3352 
3353         /* ok, got the correct packet, update info */
3354         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
3355         if (!sk->dead)
3356                 sk->data_ready(sk,0);
3357         return 0;
3358 }
3359 
3360 
3361 /*
3362  *  This deals with incoming fins. 'Linus at 9 O'clock' 8-) 
3363  *
3364  *  If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
3365  *  (and thence onto LAST-ACK and finally, CLOSE, we never enter
3366  *  TIME-WAIT)
3367  *
3368  *  If we are in FINWAIT-1, a received FIN indicates simultaneous
3369  *  close and we go into CLOSING (and later onto TIME-WAIT)
3370  *
3371  *  If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3372  *
3373  */
3374 static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
3375          unsigned long saddr, struct device *dev)
3376 {
3377         sk->fin_seq = th->seq + skb->len + th->syn + th->fin;
3378 
3379         if (!sk->dead) 
3380         {
3381                 sk->state_change(sk);
3382         }
3383 
3384         switch(sk->state) 
3385         {
3386                 case TCP_SYN_RECV:
3387                 case TCP_SYN_SENT:
3388                 case TCP_ESTABLISHED:
3389                         /*
3390                          * move to CLOSE_WAIT, tcp_data() already handled
3391                          * sending the ack.
3392                          */
3393                         reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
3394                         tcp_set_state(sk,TCP_CLOSE_WAIT);
3395                         if (th->rst)
3396                                 sk->shutdown = SHUTDOWN_MASK;
3397                         break;
3398 
3399                 case TCP_CLOSE_WAIT:
3400                 case TCP_CLOSING:
3401                         /*
3402                          * received a retransmission of the FIN, do
3403                          * nothing.
3404                          */
3405                         break;
3406                 case TCP_TIME_WAIT:
3407                         /*
3408                          * received a retransmission of the FIN,
3409                          * restart the TIME_WAIT timer.
3410                          */
3411                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3412                         return(0);
3413                 case TCP_FIN_WAIT1:
3414                         /*
3415                          * This case occurs when a simultaneous close
3416                          * happens, we must ack the received FIN and
3417                          * enter the CLOSING state.
3418                          *
3419                          * XXX timeout not set properly
3420                          */
3421 
3422                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3423                         tcp_set_state(sk,TCP_CLOSING);
3424                         break;
3425                 case TCP_FIN_WAIT2:
3426                         /*
3427                          * received a FIN -- send ACK and enter TIME_WAIT
3428                          */
3429                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3430                         sk->shutdown|=SHUTDOWN_MASK;
3431                         tcp_set_state(sk,TCP_TIME_WAIT);
3432                         break;
3433                 case TCP_CLOSE:
3434                         /*
3435                          * already in CLOSE
3436                          */
3437                         break;
3438                 default:
3439                         tcp_set_state(sk,TCP_LAST_ACK);
3440         
3441                         /* Start the timers. */
3442                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3443                         return(0);
3444         }
3445         sk->ack_backlog++;
3446 
3447         return(0);
3448 }
3449 
3450 
3451 /* This will accept the next outstanding connection. */
3452 static struct sock *
3453 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
3454 {
3455         struct sock *newsk;
3456         struct sk_buff *skb;
3457   
3458   /*
3459    * We need to make sure that this socket is listening,
3460    * and that it has something pending.
3461    */
3462 
3463         if (sk->state != TCP_LISTEN) 
3464         {
3465                 sk->err = EINVAL;
3466                 return(NULL); 
3467         }
3468 
3469         /* Avoid the race. */
3470         cli();
3471         sk->inuse = 1;
3472 
3473         while((skb = tcp_dequeue_established(sk)) == NULL) 
3474         {
3475                 if (flags & O_NONBLOCK) 
3476                 {
3477                         sti();
3478                         release_sock(sk);
3479                         sk->err = EAGAIN;
3480                         return(NULL);
3481                 }
3482 
3483                 release_sock(sk);
3484                 interruptible_sleep_on(sk->sleep);
3485                 if (current->signal & ~current->blocked) 
3486                 {
3487                         sti();
3488                         sk->err = ERESTARTSYS;
3489                         return(NULL);
3490                 }
3491                 sk->inuse = 1;
3492         }
3493         sti();
3494 
3495         /*
3496          *      Now all we need to do is return skb->sk. 
3497          */
3498 
3499         newsk = skb->sk;
3500 
3501         kfree_skb(skb, FREE_READ);
3502         sk->ack_backlog--;
3503         release_sock(sk);
3504         return(newsk);
3505 }
3506 
3507 
3508 /*
3509  *      This will initiate an outgoing connection. 
3510  */
3511  
3512 static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3513 {
3514         struct sk_buff *buff;
3515         struct device *dev=NULL;
3516         unsigned char *ptr;
3517         int tmp;
3518         struct tcphdr *t1;
3519         struct rtable *rt;
3520 
3521         if (sk->state != TCP_CLOSE) 
3522                 return(-EISCONN);
3523 
3524         if (addr_len < 8) 
3525                 return(-EINVAL);
3526 
3527         if (usin->sin_family && usin->sin_family != AF_INET) 
3528                 return(-EAFNOSUPPORT);
3529 
3530         /*
3531          *      connect() to INADDR_ANY means loopback (BSD'ism).
3532          */
3533         
3534         if(usin->sin_addr.s_addr==INADDR_ANY)
3535                 usin->sin_addr.s_addr=ip_my_addr();
3536                   
3537         /*
3538          *      Don't want a TCP connection going to a broadcast address 
3539          */
3540 
3541         if (ip_chk_addr(usin->sin_addr.s_addr) == IS_BROADCAST) 
3542         { 
3543                 return -ENETUNREACH;
3544         }
3545   
3546         /*
3547          *      Connect back to the same socket: Blows up so disallow it 
3548          */
3549 
3550         if(sk->saddr == usin->sin_addr.s_addr && sk->num==ntohs(usin->sin_port))
3551                 return -EBUSY;
3552 
3553         sk->inuse = 1;
3554         sk->daddr = usin->sin_addr.s_addr;
3555         sk->write_seq = jiffies * SEQ_TICK - seq_offset;
3556         sk->window_seq = sk->write_seq;
3557         sk->rcv_ack_seq = sk->write_seq -1;
3558         sk->err = 0;
3559         sk->dummy_th.dest = usin->sin_port;
3560         release_sock(sk);
3561 
3562         buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3563         if (buff == NULL) 
3564         {
3565                 return(-ENOMEM);
3566         }
3567         sk->inuse = 1;
3568         buff->len = 24;
3569         buff->sk = sk;
3570         buff->free = 1;
3571         buff->localroute = sk->localroute;
3572         
3573         t1 = (struct tcphdr *) buff->data;
3574 
3575         /*
3576          *      Put in the IP header and routing stuff. 
3577          */
3578          
3579         rt=ip_rt_route(sk->daddr, NULL, NULL);
3580         
3581 
3582         /*
3583          *      We need to build the routing stuff from the things saved in skb. 
3584          */
3585 
3586         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3587                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3588         if (tmp < 0) 
3589         {
3590                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3591                 release_sock(sk);
3592                 return(-ENETUNREACH);
3593         }
3594 
3595         buff->len += tmp;
3596         t1 = (struct tcphdr *)((char *)t1 +tmp);
3597 
3598         memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3599         t1->seq = ntohl(sk->write_seq++);
3600         sk->sent_seq = sk->write_seq;
3601         buff->h.seq = sk->write_seq;
3602         t1->ack = 0;
3603         t1->window = 2;
3604         t1->res1=0;
3605         t1->res2=0;
3606         t1->rst = 0;
3607         t1->urg = 0;
3608         t1->psh = 0;
3609         t1->syn = 1;
3610         t1->urg_ptr = 0;
3611         t1->doff = 6;
3612         /* use 512 or whatever user asked for */
3613         
3614         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
3615                 sk->window_clamp=rt->rt_window;
3616         else
3617                 sk->window_clamp=0;
3618 
3619         if (sk->user_mss)
3620                 sk->mtu = sk->user_mss;
3621         else if(rt!=NULL && (rt->rt_flags&RTF_MTU))
3622                 sk->mtu = rt->rt_mss;
3623         else 
3624         {
3625 #ifdef CONFIG_INET_SNARL
3626                 if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3627 #else
3628                 if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3629 #endif
3630                         sk->mtu = 576 - HEADER_SIZE;
3631                 else
3632                         sk->mtu = MAX_WINDOW;
3633         }
3634         /*
3635          *      but not bigger than device MTU 
3636          */
3637 
3638         if(sk->mtu <32)
3639                 sk->mtu = 32;   /* Sanity limit */
3640                 
3641         sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3642         
3643         /*
3644          *      Put in the TCP options to say MTU. 
3645          */
3646 
3647         ptr = (unsigned char *)(t1+1);
3648         ptr[0] = 2;
3649         ptr[1] = 4;
3650         ptr[2] = (sk->mtu) >> 8;
3651         ptr[3] = (sk->mtu) & 0xff;
3652         tcp_send_check(t1, sk->saddr, sk->daddr,
3653                   sizeof(struct tcphdr) + 4, sk);
3654 
3655         /*
3656          *      This must go first otherwise a really quick response will get reset. 
3657          */
3658 
3659         tcp_set_state(sk,TCP_SYN_SENT);
3660         sk->rto = TCP_TIMEOUT_INIT;
3661         reset_timer(sk, TIME_WRITE, sk->rto);   /* Timer for repeating the SYN until an answer */
3662         sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3663 
3664         sk->prot->queue_xmit(sk, dev, buff, 0);  
3665         tcp_statistics.TcpActiveOpens++;
3666         tcp_statistics.TcpOutSegs++;
3667   
3668         release_sock(sk);
3669         return(0);
3670 }
3671 
3672 
3673 /* This functions checks to see if the tcp header is actually acceptable. */
3674 static int
3675 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3676              struct options *opt, unsigned long saddr, struct device *dev)
3677 {
3678         unsigned long next_seq;
3679 
3680         next_seq = len - 4*th->doff;
3681         if (th->fin)
3682                 next_seq++;
3683         /* if we have a zero window, we can't have any data in the packet.. */
3684         if (next_seq && !sk->window)
3685                 goto ignore_it;
3686         next_seq += th->seq;
3687 
3688         /*
3689          * This isn't quite right.  sk->acked_seq could be more recent
3690          * than sk->window.  This is however close enough.  We will accept
3691          * slightly more packets than we should, but it should not cause
3692          * problems unless someone is trying to forge packets.
3693          */
3694 
3695         /* have we already seen all of this packet? */
3696         if (!after(next_seq+1, sk->acked_seq))
3697                 goto ignore_it;
3698         /* or does it start beyond the window? */
3699         if (!before(th->seq, sk->acked_seq + sk->window + 1))
3700                 goto ignore_it;
3701 
3702         /* ok, at least part of this packet would seem interesting.. */
3703         return 1;
3704 
3705 ignore_it:
3706         if (th->rst)
3707                 return 0;
3708 
3709         /*
3710          *      Send a reset if we get something not ours and we are
3711          *      unsynchronized. Note: We don't do anything to our end. We
3712          *      are just killing the bogus remote connection then we will
3713          *      connect again and it will work (with luck).
3714          */
3715          
3716         if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) {
3717                 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3718                 return 1;
3719         }
3720 
3721         /* Try to resync things. */
3722         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3723         return 0;
3724 }
3725 
3726 
3727 #ifdef TCP_FASTPATH
3728 /*
3729  *      Is the end of the queue clear of fragments as yet unmerged into the data stream
3730  *      Yes if
3731  *      a) The queue is empty
3732  *      b) The last frame on the queue has the acked flag set
3733  */
3734 
3735 static inline int tcp_clean_end(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3736 {
3737         struct sk_buff *skb=skb_peek(&sk->receive_queue);
3738         if(skb==NULL || sk->receive_queue.prev->acked)
3739                 return 1;
3740 }
3741 
3742 #endif
3743 
3744 int
3745 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3746         unsigned long daddr, unsigned short len,
3747         unsigned long saddr, int redo, struct inet_protocol * protocol)
3748 {
3749         struct tcphdr *th;
3750         struct sock *sk;
3751 
3752         if (!skb) 
3753         {
3754                 return(0);
3755         }
3756 
3757         if (!dev) 
3758         {
3759                 return(0);
3760         }
3761   
3762         tcp_statistics.TcpInSegs++;
3763   
3764         if(skb->pkt_type!=PACKET_HOST)
3765         {
3766                 kfree_skb(skb,FREE_READ);
3767                 return(0);
3768         }
3769   
3770         th = skb->h.th;
3771 
3772         /*
3773          *      Find the socket.
3774          */
3775 
3776         sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3777 
3778         /*
3779          *      If this socket has got a reset its to all intents and purposes 
3780          *      really dead 
3781          */
3782          
3783         if (sk!=NULL && sk->zapped)
3784                 sk=NULL;
3785 
3786         if (!redo) 
3787         {
3788                 if (tcp_check(th, len, saddr, daddr )) 
3789                 {
3790                         skb->sk = NULL;
3791                         kfree_skb(skb,FREE_READ);
3792                         /*
3793                          * We don't release the socket because it was
3794                          * never marked in use.
3795                          */
3796                         return(0);
3797                 }
3798                 th->seq = ntohl(th->seq);
3799 
3800                 /* See if we know about the socket. */
3801                 if (sk == NULL) 
3802                 {
3803                         if (!th->rst)
3804                                 tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3805                         skb->sk = NULL;
3806                         kfree_skb(skb, FREE_READ);
3807                         return(0);
3808                 }
3809 
3810                 skb->len = len;
3811                 skb->sk = sk;
3812                 skb->acked = 0;
3813                 skb->used = 0;
3814                 skb->free = 0;
3815                 skb->saddr = daddr;
3816                 skb->daddr = saddr;
3817         
3818                 /* We may need to add it to the backlog here. */
3819                 cli();
3820                 if (sk->inuse) 
3821                 {
3822                         skb_queue_head(&sk->back_log, skb);
3823                         sti();
3824                         return(0);
3825                 }
3826                 sk->inuse = 1;
3827                 sti();
3828         }
3829         else
3830         {
3831                 if (!sk) 
3832                 {
3833                         return(0);
3834                 }
3835         }
3836 
3837 
3838         if (!sk->prot) 
3839         {
3840                 return(0);
3841         }
3842 
3843 
3844         /*
3845          *      Charge the memory to the socket. 
3846          */
3847          
3848         if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) 
3849         {
3850                 skb->sk = NULL;
3851                 kfree_skb(skb, FREE_READ);
3852                 release_sock(sk);
3853                 return(0);
3854         }
3855 
3856         sk->rmem_alloc += skb->mem_len;
3857 
3858 #ifdef TCP_FASTPATH
3859 /*
3860  *      Incoming data stream fastpath. 
3861  *
3862  *      We try to optimise two things.
3863  *      1) Spot general data arriving without funny options and skip extra checks and the switch.
3864  *      2) Spot the common case in raw data receive streams of a packet that has no funny options,
3865  *      fits exactly on the end of the current queue and may or may not have the ack bit set.
3866  *
3867  *      Case two especially is done inline in this routine so there are no long jumps causing heavy
3868  *      cache thrashing, no function call overhead (except for the ack sending if needed) and for
3869  *      speed although further optimizing here is possible.
3870  */
3871  
3872         /* I'm trusting gcc to optimise this sensibly... might need judicious application of a software mallet */
3873         if(!(sk->shutdown & RCV_SHUTDOWN) && sk->state==TCP_ESTABLISHED && !th->urg && !th->syn && !th->fin && !th->rst)
3874         {       
3875                 /* Packets in order. Fits window */
3876                 if(th->seq == sk->acked_seq+1 && sk->window && tcp_clean_end(sk))
3877                 {
3878                         /* Ack is harder */
3879                         if(th->ack && !tcp_ack(sk, th, saddr, len))
3880                         {
3881                                 kfree_skb(skb, FREE_READ);
3882                                 release_sock(sk);
3883                                 return 0;
3884                         }
3885                         /*
3886                          *      Set up variables
3887                          */
3888                         skb->len -= (th->doff *4);
3889                         sk->bytes_rcv += skb->len;
3890                         tcp_rx_hit2++;
3891                         if(skb->len)
3892                         {
3893                                 skb_queue_tail(&sk->receive_queue,skb); /* We already know where to put it */
3894                                 if(sk->window >= skb->len)
3895                                         sk->window-=skb->len;                   /* We know its effect on the window */
3896                                 else
3897                                         sk->window=0;
3898                                 sk->acked_seq = th->seq+skb->len;       /* Easy */
3899                                 skb->acked=1;                           /* Guaranteed true */
3900                                 if(!sk->delay_acks || sk->ack_backlog >= sk->max_ack_backlog || 
3901                                         sk->bytes_rcv > sk->max_unacked)
3902                                 {
3903                                         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th , saddr);
3904                                 }
3905                                 else
3906                                 {
3907                                         sk->ack_backlog++;
3908                                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3909                                 }
3910                                 if(!sk->dead)
3911                                         sk->data_ready(sk,0);
3912                                 release_sock(sk);
3913                                 return 0;
3914                         }
3915                 }
3916                 /*
3917                  *      More generic case of arriving data stream in ESTABLISHED
3918                  */
3919                 tcp_rx_hit1++;
3920                 if(!tcp_sequence(sk, th, len, opt, saddr, dev))
3921                 {
3922                         kfree_skb(skb, FREE_READ);
3923                         release_sock(sk);
3924                         return 0;
3925                 }
3926                 if(th->ack && !tcp_ack(sk, th, saddr, len))
3927                 {
3928                         kfree_skb(skb, FREE_READ);
3929                         release_sock(sk);
3930                         return 0;
3931                 }
3932                 if(tcp_data(skb, sk, saddr, len))
3933                         kfree_skb(skb, FREE_READ);
3934                 release_sock(sk);
3935                 return 0;
3936         }
3937         tcp_rx_miss++;
3938 #endif  
3939 
3940         /*
3941          *      Now deal with all cases.
3942          */
3943          
3944         switch(sk->state) 
3945         {
3946         
3947                 /*
3948                  * This should close the system down if it's waiting
3949                  * for an ack that is never going to be sent.
3950                  */
3951                 case TCP_LAST_ACK:
3952                         if (th->rst) 
3953                         {
3954                                 sk->zapped=1;
3955                                 sk->err = ECONNRESET;
3956                                 tcp_set_state(sk,TCP_CLOSE);
3957                                 sk->shutdown = SHUTDOWN_MASK;
3958                                 if (!sk->dead) 
3959                                 {
3960                                         sk->state_change(sk);
3961                                 }
3962                                 kfree_skb(skb, FREE_READ);
3963                                 release_sock(sk);
3964                                 return(0);
3965                         }
3966 
3967                 case TCP_ESTABLISHED:
3968                 case TCP_CLOSE_WAIT:
3969                 case TCP_CLOSING:
3970                 case TCP_FIN_WAIT1:
3971                 case TCP_FIN_WAIT2:
3972                 case TCP_TIME_WAIT:
3973                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
3974                         {
3975                                 kfree_skb(skb, FREE_READ);
3976                                 release_sock(sk);
3977                                 return(0);
3978                         }
3979 
3980                         if (th->rst) 
3981                         {
3982                                 tcp_statistics.TcpEstabResets++;
3983                                 sk->zapped=1;
3984                                 /* This means the thing should really be closed. */
3985                                 sk->err = ECONNRESET;
3986                                 if (sk->state == TCP_CLOSE_WAIT) 
3987                                 {
3988                                         sk->err = EPIPE;
3989                                 }
3990         
3991                                 /*
3992                                  * A reset with a fin just means that
3993                                  * the data was not all read.
3994                                  */
3995                                 tcp_set_state(sk,TCP_CLOSE);
3996                                 sk->shutdown = SHUTDOWN_MASK;
3997                                 if (!sk->dead) 
3998                                 {
3999                                         sk->state_change(sk);
4000                                 }
4001                                 kfree_skb(skb, FREE_READ);
4002                                 release_sock(sk);
4003                                 return(0);
4004                         }
4005                         if (th->syn) 
4006                         {
4007                                 tcp_statistics.TcpEstabResets++;
4008                                 sk->err = ECONNRESET;
4009                                 tcp_set_state(sk,TCP_CLOSE);
4010                                 sk->shutdown = SHUTDOWN_MASK;
4011                                 tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
4012                                 if (!sk->dead) {
4013                                         sk->state_change(sk);
4014                                 }
4015                                 kfree_skb(skb, FREE_READ);
4016                                 release_sock(sk);
4017                                 return(0);
4018                         }
4019         
4020                         if (th->ack && !tcp_ack(sk, th, saddr, len)) {
4021                                 kfree_skb(skb, FREE_READ);
4022                                 release_sock(sk);
4023                                 return(0);
4024                         }
4025         
4026                         if (tcp_urg(sk, th, saddr, len)) {
4027                                 kfree_skb(skb, FREE_READ);
4028                                 release_sock(sk);
4029                                 return(0);
4030                         }
4031 
4032         
4033                         if (tcp_data(skb, sk, saddr, len)) {
4034                                 kfree_skb(skb, FREE_READ);
4035                                 release_sock(sk);
4036                                 return(0);
4037                         }       
4038 
4039                         if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) {
4040                                 kfree_skb(skb, FREE_READ);
4041                                 release_sock(sk);
4042                                 return(0);
4043                         }
4044         
4045                         release_sock(sk);
4046                         return(0);
4047                 
4048                 case TCP_CLOSE:
4049                         if (sk->dead || sk->daddr) {
4050                                 kfree_skb(skb, FREE_READ);
4051                                         release_sock(sk);
4052                                 return(0);
4053                         }
4054         
4055                         if (!th->rst) {
4056                                 if (!th->ack)
4057                                         th->ack_seq = 0;
4058                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4059                         }
4060                         kfree_skb(skb, FREE_READ);
4061                         release_sock(sk);
4062                                 return(0);
4063         
4064                 case TCP_LISTEN:
4065                         if (th->rst) {
4066                                 kfree_skb(skb, FREE_READ);
4067                                 release_sock(sk);
4068                                 return(0);
4069                         }
4070                         if (th->ack) {
4071                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4072                                 kfree_skb(skb, FREE_READ);
4073                                 release_sock(sk);
4074                                 return(0);
4075                         }
4076         
4077                         if (th->syn) 
4078                         {
4079                                 /*
4080                                  * Now we just put the whole thing including
4081                                  * the header and saddr, and protocol pointer
4082                                  * into the buffer.  We can't respond until the
4083                                  * user tells us to accept the connection.
4084                                  */
4085                                 tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
4086                                 release_sock(sk);
4087                                 return(0);
4088                         }
4089 
4090                         kfree_skb(skb, FREE_READ);
4091                         release_sock(sk);
4092                         return(0);
4093 
4094                 case TCP_SYN_RECV:
4095                         if (th->syn) {
4096                                 /* Probably a retransmitted syn */
4097                                 kfree_skb(skb, FREE_READ);
4098                                 release_sock(sk);
4099                                 return(0);
4100                         }
4101         
4102         
4103                 default:
4104                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
4105                         {
4106                                 kfree_skb(skb, FREE_READ);
4107                                 release_sock(sk);
4108                                 return(0);
4109                         }
4110         
4111                 case TCP_SYN_SENT:
4112                         if (th->rst) 
4113                         {
4114                                 tcp_statistics.TcpAttemptFails++;
4115                                 sk->err = ECONNREFUSED;
4116                                 tcp_set_state(sk,TCP_CLOSE);
4117                                 sk->shutdown = SHUTDOWN_MASK;
4118                                 sk->zapped = 1;
4119                                 if (!sk->dead) 
4120                                 {
4121                                         sk->state_change(sk);
4122                                 }
4123                                 kfree_skb(skb, FREE_READ);
4124                                 release_sock(sk);
4125                                 return(0);
4126                         }
4127                         if (!th->ack) 
4128                         {
4129                                 if (th->syn) 
4130                                 {
4131                                         tcp_set_state(sk,TCP_SYN_RECV);
4132                                 }
4133                                 kfree_skb(skb, FREE_READ);
4134                                 release_sock(sk);
4135                                 return(0);
4136                         }
4137         
4138                         switch(sk->state) 
4139                         {
4140                                 case TCP_SYN_SENT:
4141                                         if (!tcp_ack(sk, th, saddr, len)) 
4142                                         {
4143                                                 tcp_statistics.TcpAttemptFails++;
4144                                                 tcp_reset(daddr, saddr, th,
4145                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4146                                                 kfree_skb(skb, FREE_READ);
4147                                                         release_sock(sk);
4148                                                 return(0);
4149                                         }
4150         
4151                                         /*
4152                                          * If the syn bit is also set, switch to
4153                                          * tcp_syn_recv, and then to established.
4154                                          */
4155                                         if (!th->syn) 
4156                                         {
4157                                                 kfree_skb(skb, FREE_READ);
4158                                                 release_sock(sk);
4159                                                 return(0);
4160                                         }
4161         
4162                                         /* Ack the syn and fall through. */
4163                                         sk->acked_seq = th->seq+1;
4164                                         sk->fin_seq = th->seq;
4165                                         tcp_send_ack(sk->sent_seq, th->seq+1,
4166                                                 sk, th, sk->daddr);
4167                 
4168                                 case TCP_SYN_RECV:
4169                                         if (!tcp_ack(sk, th, saddr, len)) 
4170                                         {
4171                                                 tcp_statistics.TcpAttemptFails++;
4172                                                 tcp_reset(daddr, saddr, th,
4173                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
4174                                                 kfree_skb(skb, FREE_READ);
4175                                                 release_sock(sk);
4176                                                 return(0);
4177                                         }
4178         
4179                                         tcp_set_state(sk,TCP_ESTABLISHED);
4180         
4181                                         /*
4182                                          *      Now we need to finish filling out
4183                                          *      some of the tcp header.
4184                                          * 
4185                                          *      We need to check for mtu info. 
4186                                          */
4187                                         tcp_options(sk, th);
4188                                         sk->dummy_th.dest = th->source;
4189                                         sk->copied_seq = sk->acked_seq-1;
4190                                         if (!sk->dead) 
4191                                         {
4192                                                 sk->state_change(sk);
4193                                         }
4194         
4195                                         /*
4196                                          * We've already processed his first
4197                                          * ack.  In just about all cases that
4198                                          * will have set max_window.  This is
4199                                          * to protect us against the possibility
4200                                          * that the initial window he sent was 0.
4201                                          * This must occur after tcp_options, which
4202                                          * sets sk->mtu.
4203                                          */
4204                                         if (sk->max_window == 0) 
4205                                         {
4206                                                 sk->max_window = 32;
4207                                                 sk->mss = min(sk->max_window, sk->mtu);
4208                                         }
4209 
4210                                         /*
4211                                          * Now process the rest like we were
4212                                          * already in the established state.
4213                                          */
4214                                         if (th->urg) 
4215                                         {
4216                                                 if (tcp_urg(sk, th, saddr, len)) 
4217                                                 { 
4218                                                         kfree_skb(skb, FREE_READ);
4219                                                         release_sock(sk);
4220                                                         return(0);
4221                                                 }
4222                                         }
4223                                         if (tcp_data(skb, sk, saddr, len))
4224                                                 kfree_skb(skb, FREE_READ);
4225 
4226                                         if (th->fin)
4227                                                 tcp_fin(skb, sk, th, saddr, dev);
4228                                         release_sock(sk);
4229                                         return(0);
4230                         }
4231         
4232                         if (th->urg) 
4233                         {
4234                                 if (tcp_urg(sk, th, saddr, len)) 
4235                                 {
4236                                         kfree_skb(skb, FREE_READ);
4237                                         release_sock(sk);
4238                                         return(0);
4239                                 }
4240                         }
4241                         if (tcp_data(skb, sk, saddr, len)) 
4242                         {
4243                                 kfree_skb(skb, FREE_READ);
4244                                 release_sock(sk);
4245                                 return(0);
4246                         }
4247         
4248                         if (!th->fin) 
4249                         {
4250                                 release_sock(sk);
4251                                 return(0);
4252                         }
4253                         tcp_fin(skb, sk, th, saddr, dev);
4254                         release_sock(sk);
4255                         return(0);
4256         }
4257 }
4258 
4259 
4260 /*
4261  * This routine sends a packet with an out of date sequence
4262  * number. It assumes the other end will try to ack it.
4263  */
4264 
4265 static void tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4266 {
4267         struct sk_buff *buff;
4268         struct tcphdr *t1;
4269         struct device *dev=NULL;
4270         int tmp;
4271 
4272         if (sk->zapped)
4273                 return; /* After a valid reset we can send no more */
4274 
4275         /*
4276          * Write data can still be transmitted/retransmitted in the
4277          * following states.  If any other state is encountered, return.
4278          */
4279 
4280         if (sk->state != TCP_ESTABLISHED && 
4281             sk->state != TCP_CLOSE_WAIT &&
4282             sk->state != TCP_FIN_WAIT1 && 
4283             sk->state != TCP_LAST_ACK &&
4284             sk->state != TCP_CLOSING
4285         ) {
4286                 return;
4287         }
4288 
4289         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
4290         if (buff == NULL) 
4291                 return;
4292 
4293         buff->len = sizeof(struct tcphdr);
4294         buff->free = 1;
4295         buff->sk = sk;
4296         buff->localroute = sk->localroute;
4297 
4298         t1 = (struct tcphdr *) buff->data;
4299 
4300         /* Put in the IP header and routing stuff. */
4301         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
4302                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
4303         if (tmp < 0) 
4304         {
4305                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
4306                 return;
4307         }
4308 
4309         buff->len += tmp;
4310         t1 = (struct tcphdr *)((char *)t1 +tmp);
4311 
4312         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
4313 
4314         /*
4315          * Use a previous sequence.
4316          * This should cause the other end to send an ack.
4317          */
4318         t1->seq = htonl(sk->sent_seq-1);
4319         t1->ack = 1; 
4320         t1->res1= 0;
4321         t1->res2= 0;
4322         t1->rst = 0;
4323         t1->urg = 0;
4324         t1->psh = 0;
4325         t1->fin = 0;
4326         t1->syn = 0;
4327         t1->ack_seq = ntohl(sk->acked_seq);
4328         t1->window = ntohs(tcp_select_window(sk));
4329         t1->doff = sizeof(*t1)/4;
4330         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
4331 
4332          /*     Send it and free it.
4333           *     This will prevent the timer from automatically being restarted.
4334           */
4335         sk->prot->queue_xmit(sk, dev, buff, 1);
4336         tcp_statistics.TcpOutSegs++;
4337 }
4338 
4339 void
4340 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4341 {
4342         if (sk->zapped)
4343                 return;         /* After a valid reset we can send no more */
4344 
4345         tcp_write_wakeup(sk);
4346 
4347         sk->backoff++;
4348         sk->rto = min(sk->rto << 1, 120*HZ);
4349         reset_timer (sk, TIME_PROBE0, sk->rto);
4350         sk->retransmits++;
4351         sk->prot->retransmits ++;
4352 }
4353 
4354 /*
4355  *      Socket option code for TCP. 
4356  */
4357   
4358 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4359 {
4360         int val,err;
4361 
4362         if(level!=SOL_TCP)
4363                 return ip_setsockopt(sk,level,optname,optval,optlen);
4364 
4365         if (optval == NULL) 
4366                 return(-EINVAL);
4367 
4368         err=verify_area(VERIFY_READ, optval, sizeof(int));
4369         if(err)
4370                 return err;
4371         
4372         val = get_fs_long((unsigned long *)optval);
4373 
4374         switch(optname)
4375         {
4376                 case TCP_MAXSEG:
4377 /*
4378  * values greater than interface MTU won't take effect.  however at
4379  * the point when this call is done we typically don't yet know
4380  * which interface is going to be used
4381  */
4382                         if(val<1||val>MAX_WINDOW)
4383                                 return -EINVAL;
4384                         sk->user_mss=val;
4385                         return 0;
4386                 case TCP_NODELAY:
4387                         sk->nonagle=(val==0)?0:1;
4388                         return 0;
4389                 default:
4390                         return(-ENOPROTOOPT);
4391         }
4392 }
4393 
4394 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4395 {
4396         int val,err;
4397 
4398         if(level!=SOL_TCP)
4399                 return ip_getsockopt(sk,level,optname,optval,optlen);
4400                         
4401         switch(optname)
4402         {
4403                 case TCP_MAXSEG:
4404                         val=sk->user_mss;
4405                         break;
4406                 case TCP_NODELAY:
4407                         val=sk->nonagle;        /* Until Johannes stuff is in */
4408                         break;
4409                 default:
4410                         return(-ENOPROTOOPT);
4411         }
4412         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
4413         if(err)
4414                 return err;
4415         put_fs_long(sizeof(int),(unsigned long *) optlen);
4416 
4417         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
4418         if(err)
4419                 return err;
4420         put_fs_long(val,(unsigned long *)optval);
4421 
4422         return(0);
4423 }       
4424 
4425 
4426 struct proto tcp_prot = {
4427         sock_wmalloc,
4428         sock_rmalloc,
4429         sock_wfree,
4430         sock_rfree,
4431         sock_rspace,
4432         sock_wspace,
4433         tcp_close,
4434         tcp_read,
4435         tcp_write,
4436         tcp_sendto,
4437         tcp_recvfrom,
4438         ip_build_header,
4439         tcp_connect,
4440         tcp_accept,
4441         ip_queue_xmit,
4442         tcp_retransmit,
4443         tcp_write_wakeup,
4444         tcp_read_wakeup,
4445         tcp_rcv,
4446         tcp_select,
4447         tcp_ioctl,
4448         NULL,
4449         tcp_shutdown,
4450         tcp_setsockopt,
4451         tcp_getsockopt,
4452         128,
4453         0,
4454         {NULL,},
4455         "TCP"
4456 };

/* [previous][next][first][last][top][bottom][index][help] */