root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. tcp_set_state
  3. tcp_select_window
  4. tcp_find_established
  5. tcp_close_pending
  6. tcp_dequeue_established
  7. tcp_time_wait
  8. tcp_retransmit
  9. tcp_err
  10. tcp_readable
  11. tcp_select
  12. tcp_ioctl
  13. tcp_check
  14. tcp_send_check
  15. tcp_send_skb
  16. tcp_dequeue_partial
  17. tcp_send_partial
  18. tcp_enqueue_partial
  19. tcp_send_ack
  20. tcp_build_header
  21. tcp_write
  22. tcp_sendto
  23. tcp_read_wakeup
  24. cleanup_rbuf
  25. tcp_read_urg
  26. tcp_read
  27. tcp_shutdown
  28. tcp_recvfrom
  29. tcp_reset
  30. tcp_options
  31. default_mask
  32. tcp_init_seq
  33. tcp_conn_request
  34. tcp_close
  35. tcp_write_xmit
  36. tcp_ack
  37. tcp_data
  38. tcp_check_urg
  39. tcp_urg
  40. tcp_fin
  41. tcp_accept
  42. tcp_connect
  43. tcp_sequence
  44. tcp_clean_end
  45. tcp_rcv
  46. tcp_write_wakeup
  47. tcp_send_probe0
  48. tcp_setsockopt
  49. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@no.unit.nvg>
  20  *
  21  * Fixes:       
  22  *              Alan Cox        :       Numerous verify_area() calls
  23  *              Alan Cox        :       Set the ACK bit on a reset
  24  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  25  *                                      and was trying to connect (tcp_err()).
  26  *              Alan Cox        :       All icmp error handling was broken
  27  *                                      pointers passed where wrong and the
  28  *                                      socket was looked up backwards. Nobody
  29  *                                      tested any icmp error code obviously.
  30  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  31  *                                      on errors. select behaves and the icmp error race
  32  *                                      has gone by moving it into sock.c
  33  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  34  *                                      packets for unknown sockets.
  35  *              Alan Cox        :       tcp option processing.
  36  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  37  *              Herp Rosmanith  :       More reset fixes
  38  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  39  *                                      any kind of RST is right out.
  40  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  41  *                                      otherwise odd bits of prattle escape still
  42  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  43  *                                      LAN workplace lockups.
  44  *              Alan Cox        :       Some tidyups using the new skb list facilities
  45  *              Alan Cox        :       sk->keepopen now seems to work
  46  *              Alan Cox        :       Pulls options out correctly on accepts
  47  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  48  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  49  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  50  *              Alan Cox        :       Added some better commenting, as the tcp is hard to follow
  51  *              Alan Cox        :       Removed incorrect check for 20 * psh
  52  *      Michael O'Reilly        :       ack < copied bug fix.
  53  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  54  *              Alan Cox        :       FIN with no memory -> CRASH
  55  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  56  *              Alan Cox        :       Added TCP options (SOL_TCP)
  57  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  58  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  59  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  60  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  61  *              Alan Cox        :       Put in missing check for SYN bit.
  62  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  63  *                                      window non shrink trick.
  64  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  65  *              Charles Hedrick :       TCP fixes
  66  *              Toomas Tamm     :       TCP window fixes
  67  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  68  *              Charles Hedrick :       Rewrote most of it to actually work
  69  *              Linus           :       Rewrote tcp_read() and URG handling
  70  *                                      completely
  71  *              Gerhard Koerting:       Fixed some missing timer handling
  72  *              Matthew Dillon  :       Reworked TCP machine states as per RFC
  73  *              Gerhard Koerting:       PC/TCP workarounds
  74  *              Adam Caldwell   :       Assorted timer/timing errors
  75  *              Matthew Dillon  :       Fixed another RST bug
  76  *              Alan Cox        :       Move to kernel side addressing changes.
  77  *              Alan Cox        :       Beginning work on TCP fastpathing (not yet usable)
  78  *              Arnt Gulbrandsen:       Turbocharged tcp_check() routine.
  79  *              Alan Cox        :       TCP fast path debugging
  80  *              Alan Cox        :       Window clamping
  81  *              Michael Riepe   :       Bug in tcp_check()
  82  *              Matt Dillon     :       More TCP improvements and RST bug fixes
  83  *              Matt Dillon     :       Yet more small nasties remove from the TCP code
  84  *                                      (Be very nice to this man if tcp finally works 100%) 8)
  85  *              Alan Cox        :       BSD accept semantics. 
  86  *              Alan Cox        :       Reset on closedown bug.
  87  *      Peter De Schrijver      :       ENOTCONN check missing in tcp_sendto().
  88  *              Michael Pall    :       Handle select() after URG properly in all cases.
  89  *              Michael Pall    :       Undo the last fix in tcp_read_urg() (multi URG PUSH broke rlogin).
  90  *              Michael Pall    :       Fix the multi URG PUSH problem in tcp_readable(), select() after URG works now.
  91  *              Michael Pall    :       recv(...,MSG_OOB) never blocks in the BSD api.
  92  *              Alan Cox        :       Changed the semantics of sk->socket to 
  93  *                                      fix a race and a signal problem with
  94  *                                      accept() and async I/O.
  95  *              Alan Cox        :       Relaxed the rules on tcp_sendto().
  96  *              Yury Shevchuk   :       Really fixed accept() blocking problem.
  97  *              Craig I. Hagan  :       Allow for BSD compatible TIME_WAIT for
  98  *                                      clients/servers which listen in on
  99  *                                      fixed ports.
 100  *              Alan Cox        :       Cleaned the above up and shrank it to
 101  *                                      a sensible code size.
 102  *              Alan Cox        :       Self connect lockup fix.
 103  *              Alan Cox        :       No connect to multicast.
 104  *              Ross Biro       :       Close unaccepted children on master
 105  *                                      socket close.
 106  *              Alan Cox        :       Reset tracing code.
 107  *              Alan Cox        :       Spurious resets on shutdown.
 108  *
 109  *
 110  * To Fix:
 111  *                      Fast path the code. Two things here - fix the window calculation
 112  *              so it doesn't iterate over the queue, also spot packets with no funny
 113  *              options arriving in order and process directly.
 114  *
 115  *              This program is free software; you can redistribute it and/or
 116  *              modify it under the terms of the GNU General Public License
 117  *              as published by the Free Software Foundation; either version
 118  *              2 of the License, or(at your option) any later version.
 119  *
 120  * Description of States:
 121  *
 122  *      TCP_SYN_SENT            sent a connection request, waiting for ack
 123  *
 124  *      TCP_SYN_RECV            received a connection request, sent ack,
 125  *                              waiting for final ack in three-way handshake.
 126  *
 127  *      TCP_ESTABLISHED         connection established
 128  *
 129  *      TCP_FIN_WAIT1           our side has shutdown, waiting to complete
 130  *                              transmission of remaining buffered data
 131  *
 132  *      TCP_FIN_WAIT2           all buffered data sent, waiting for remote
 133  *                              to shutdown
 134  *
 135  *      TCP_CLOSING             both sides have shutdown but we still have
 136  *                              data we have to finish sending
 137  *
 138  *      TCP_TIME_WAIT           timeout to catch resent junk before entering
 139  *                              closed, can only be entered from FIN_WAIT2
 140  *                              or CLOSING.  Required because the other end
 141  *                              may not have gotten our last ACK causing it
 142  *                              to retransmit the data packet (which we ignore)
 143  *
 144  *      TCP_CLOSE_WAIT          remote side has shutdown and is waiting for
 145  *                              us to finish writing our data and to shutdown
 146  *                              (we have to close() to move on to LAST_ACK)
 147  *
 148  *      TCP_LAST_ACK            out side has shutdown after remote has
 149  *                              shutdown.  There may still be data in our
 150  *                              buffer that we have to finish sending
 151  *              
 152  *      TCP_CLOSE               socket is finished
 153  */
 154 #include <linux/types.h>
 155 #include <linux/sched.h>
 156 #include <linux/mm.h>
 157 #include <linux/string.h>
 158 #include <linux/socket.h>
 159 #include <linux/sockios.h>
 160 #include <linux/termios.h>
 161 #include <linux/in.h>
 162 #include <linux/fcntl.h>
 163 #include <linux/inet.h>
 164 #include <linux/netdevice.h>
 165 #include "snmp.h"
 166 #include "ip.h"
 167 #include "protocol.h"
 168 #include "icmp.h"
 169 #include "tcp.h"
 170 #include <linux/skbuff.h>
 171 #include "sock.h"
 172 #include "route.h"
 173 #include <linux/errno.h>
 174 #include <linux/timer.h>
 175 #include <asm/system.h>
 176 #include <asm/segment.h>
 177 #include <linux/mm.h>
 178 
 179 #undef TCP_FASTPATH
 180 
 181 #define SEQ_TICK 3
 182 unsigned long seq_offset;
 183 struct tcp_mib  tcp_statistics;
 184 
 185 static void tcp_close(struct sock *sk, int timeout);
 186 
 187 #ifdef TCP_FASTPATH
 188 unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0;
 189 #endif
 190 
 191 
 192 static __inline__ int min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 193 {
 194         if (a < b) 
 195                 return(a);
 196         return(b);
 197 }
 198 
 199 #undef STATE_TRACE
 200 
 201 static __inline__ void tcp_set_state(struct sock *sk, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 202 {
 203         if(sk->state==TCP_ESTABLISHED)
 204                 tcp_statistics.TcpCurrEstab--;
 205 #ifdef STATE_TRACE
 206         if(sk->debug)
 207                 printk("TCP sk=%s, State %d -> %d\n",sk, sk->state,state);
 208 #endif  
 209         sk->state=state;
 210         if(state==TCP_ESTABLISHED)
 211                 tcp_statistics.TcpCurrEstab++;
 212 }
 213 
 214 /* This routine picks a TCP windows for a socket based on
 215    the following constraints
 216    
 217    1. The window can never be shrunk once it is offered (RFC 793)
 218    2. We limit memory per socket
 219    
 220    For now we use NET2E3's heuristic of offering half the memory
 221    we have handy. All is not as bad as this seems however because
 222    of two things. Firstly we will bin packets even within the window
 223    in order to get the data we are waiting for into the memory limit.
 224    Secondly we bin common duplicate forms at receive time
 225    
 226    Better heuristics welcome
 227 */
 228    
 229 int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 230 {
 231         int new_window = sk->prot->rspace(sk);
 232         
 233         if(sk->window_clamp)
 234                 new_window=min(sk->window_clamp,new_window);
 235 /*
 236  * two things are going on here.  First, we don't ever offer a
 237  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 238  * receiver side of SWS as specified in RFC1122.
 239  * Second, we always give them at least the window they
 240  * had before, in order to avoid retracting window.  This
 241  * is technically allowed, but RFC1122 advises against it and
 242  * in practice it causes trouble.
 243  */
 244         if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window)
 245                 return(sk->window);
 246         return(new_window);
 247 }
 248 
 249 /*
 250  *      Find someone to 'accept'. Must be called with
 251  *      sk->inuse=1 or cli()
 252  */ 
 253 
 254 static struct sk_buff *tcp_find_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 255 {
 256         struct sk_buff *p=skb_peek(&s->receive_queue);
 257         if(p==NULL)
 258                 return NULL;
 259         do
 260         {
 261                 if(p->sk->state == TCP_ESTABLISHED || p->sk->state >= TCP_FIN_WAIT1)
 262                         return p;
 263                 p=p->next;
 264         }
 265         while(p!=skb_peek(&s->receive_queue));
 266         return NULL;
 267 }
 268 
 269 
 270 /* 
 271  *      This routine closes sockets which have been at least partially
 272  *      opened, but not yet accepted. Currently it is only called by
 273  *      tcp_close, and timeout mirrors the value there. 
 274  */
 275 
 276 static void tcp_close_pending (struct sock *sk, int timeout) 
     /* [previous][next][first][last][top][bottom][index][help] */
 277 {
 278         unsigned long flags;
 279         struct sk_buff *p, *old_p;
 280 
 281         save_flags(flags);
 282         cli(); 
 283         p=skb_peek(&sk->receive_queue);
 284 
 285         if(p==NULL) 
 286         {
 287                 restore_flags(flags);
 288                 return;
 289         }
 290 
 291         do
 292         {
 293                 tcp_close (p->sk, timeout);
 294                 skb_unlink (p);
 295                 old_p = p;
 296                 p=p->next;
 297                 kfree_skb(old_p, FREE_READ);
 298         }
 299         while(p!=skb_peek(&sk->receive_queue));
 300 
 301         restore_flags(flags);
 302         return;
 303 }
 304 
 305 static struct sk_buff *tcp_dequeue_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 306 {
 307         struct sk_buff *skb;
 308         unsigned long flags;
 309         save_flags(flags);
 310         cli(); 
 311         skb=tcp_find_established(s);
 312         if(skb!=NULL)
 313                 skb_unlink(skb);        /* Take it off the queue */
 314         restore_flags(flags);
 315         return skb;
 316 }
 317 
 318 
 319 /*
 320  *      Enter the time wait state. 
 321  */
 322 
 323 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 324 {
 325         tcp_set_state(sk,TCP_TIME_WAIT);
 326         sk->shutdown = SHUTDOWN_MASK;
 327         if (!sk->dead)
 328                 sk->state_change(sk);
 329         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 330 }
 331 
 332 /*
 333  *      A timer event has trigger a tcp retransmit timeout. The
 334  *      socket xmit queue is ready and set up to send. Because
 335  *      the ack receive code keeps the queue straight we do
 336  *      nothing clever here.
 337  */
 338 
 339 static void tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 340 {
 341         if (all) 
 342         {
 343                 ip_retransmit(sk, all);
 344                 return;
 345         }
 346 
 347         sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 348         /* sk->ssthresh in theory can be zero.  I guess that's OK */
 349         sk->cong_count = 0;
 350 
 351         sk->cong_window = 1;
 352 
 353         /* Do the actual retransmit. */
 354         ip_retransmit(sk, all);
 355 }
 356 
 357 
 358 /*
 359  * This routine is called by the ICMP module when it gets some
 360  * sort of error condition.  If err < 0 then the socket should
 361  * be closed and the error returned to the user.  If err > 0
 362  * it's just the icmp type << 8 | icmp code.  After adjustment
 363  * header points to the first 8 bytes of the tcp header.  We need
 364  * to find the appropriate port.
 365  */
 366 
 367 void tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 368         unsigned long saddr, struct inet_protocol *protocol)
 369 {
 370         struct tcphdr *th;
 371         struct sock *sk;
 372         struct iphdr *iph=(struct iphdr *)header;
 373   
 374         header+=4*iph->ihl;
 375    
 376 
 377         th =(struct tcphdr *)header;
 378         sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr);
 379 
 380         if (sk == NULL) 
 381                 return;
 382   
 383         if(err<0)
 384         {
 385                 sk->err = -err;
 386                 sk->error_report(sk);
 387                 return;
 388         }
 389 
 390         if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) 
 391         {
 392                 /*
 393                  * FIXME:
 394                  * For now we will just trigger a linear backoff.
 395                  * The slow start code should cause a real backoff here.
 396                  */
 397                 if (sk->cong_window > 4)
 398                         sk->cong_window--;
 399                 return;
 400         }
 401 
 402 /*      sk->err = icmp_err_convert[err & 0xff].errno;  -- moved as TCP should hide non fatals internally (and does) */
 403 
 404         /*
 405          * If we've already connected we will keep trying
 406          * until we time out, or the user gives up.
 407          */
 408 
 409         if (icmp_err_convert[err & 0xff].fatal || sk->state == TCP_SYN_SENT) 
 410         {
 411                 if (sk->state == TCP_SYN_SENT) 
 412                 {
 413                         tcp_statistics.TcpAttemptFails++;
 414                         tcp_set_state(sk,TCP_CLOSE);
 415                         sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 416                 }
 417                 sk->err = icmp_err_convert[err & 0xff].errno;           
 418         }
 419         return;
 420 }
 421 
 422 
 423 /*
 424  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 425  *      in the received data queue (ie a frame missing that needs sending to us)
 426  */
 427 
 428 static int tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 429 {
 430         unsigned long counted;
 431         unsigned long amount;
 432         struct sk_buff *skb;
 433         int sum;
 434         unsigned long flags;
 435 
 436         if(sk && sk->debug)
 437                 printk("tcp_readable: %p - ",sk);
 438 
 439         save_flags(flags);
 440         cli();
 441         if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 442         {
 443                 restore_flags(flags);
 444                 if(sk && sk->debug) 
 445                         printk("empty\n");
 446                 return(0);
 447         }
 448   
 449         counted = sk->copied_seq+1;     /* Where we are at the moment */
 450         amount = 0;
 451   
 452         /* Do until a push or until we are out of data. */
 453         do 
 454         {
 455                 if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 456                         break;
 457                 sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 458                 if (skb->h.th->syn)
 459                         sum++;
 460                 if (sum > 0) 
 461                 {                                       /* Add it up, move on */
 462                         amount += sum;
 463                         if (skb->h.th->syn) 
 464                                 amount--;
 465                         counted += sum;
 466                 }
 467                 /*
 468                  * Don't count urg data ... but do it in the right place!
 469                  * Consider: "old_data (ptr is here) URG PUSH data"
 470                  * The old code would stop at the first push because
 471                  * it counted the urg (amount==1) and then does amount--
 472                  * *after* the loop.  This means tcp_readable() always
 473                  * returned zero if any URG PUSH was in the queue, even
 474                  * though there was normal data available. If we subtract
 475                  * the urg data right here, we even get it to work for more
 476                  * than one URG PUSH skb without normal data.
 477                  * This means that select() finally works now with urg data
 478                  * in the queue.  Note that rlogin was never affected
 479                  * because it doesn't use select(); it uses two processes
 480                  * and a blocking read().  And the queue scan in tcp_read()
 481                  * was correct.  Mike <pall@rz.uni-karlsruhe.de>
 482                  */
 483                 if (skb->h.th->urg)
 484                         amount--;       /* don't count urg data */
 485                 if (amount && skb->h.th->psh) break;
 486                 skb = skb->next;
 487         }
 488         while(skb != (struct sk_buff *)&sk->receive_queue);
 489 
 490         restore_flags(flags);
 491         if(sk->debug)
 492                 printk("got %lu bytes.\n",amount);
 493         return(amount);
 494 }
 495 
 496 
 497 /*
 498  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 499  *      listening socket has a receive queue of sockets to accept.
 500  */
 501 
 502 static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 503 {
 504         sk->inuse = 1;
 505 
 506         switch(sel_type) 
 507         {
 508                 case SEL_IN:
 509                         select_wait(sk->sleep, wait);
 510                         if (skb_peek(&sk->receive_queue) != NULL) 
 511                         {
 512                                 if ((sk->state == TCP_LISTEN && tcp_find_established(sk)) || tcp_readable(sk)) 
 513                                 {
 514                                         release_sock(sk);
 515                                         return(1);
 516                                 }
 517                         }
 518                         if (sk->err != 0)       /* Receiver error */
 519                         {
 520                                 release_sock(sk);
 521                                 return(1);
 522                         }
 523                         if (sk->shutdown & RCV_SHUTDOWN) 
 524                         {
 525                                 release_sock(sk);
 526                                 return(1);
 527                         } 
 528                         release_sock(sk);
 529                         return(0);
 530                 case SEL_OUT:
 531                         select_wait(sk->sleep, wait);
 532                         if (sk->shutdown & SEND_SHUTDOWN) 
 533                         {
 534                                 /* FIXME: should this return an error? */
 535                                 release_sock(sk);
 536                                 return(0);
 537                         }
 538 
 539                         /*
 540                          * This is now right thanks to a small fix
 541                          * by Matt Dillon.
 542                          */
 543                         
 544                         if (sk->prot->wspace(sk) >= sk->mtu+128+sk->prot->max_header) 
 545                         {
 546                                 release_sock(sk);
 547                                 /* This should cause connect to work ok. */
 548                                 if (sk->state == TCP_SYN_RECV ||
 549                                     sk->state == TCP_SYN_SENT) return(0);
 550                                 return(1);
 551                         }
 552                         release_sock(sk);
 553                         return(0);
 554                 case SEL_EX:
 555                         select_wait(sk->sleep,wait);
 556                         if (sk->err || sk->urg_data) 
 557                         {
 558                                 release_sock(sk);
 559                                 return(1);
 560                         }
 561                         release_sock(sk);
 562                         return(0);
 563         }
 564 
 565         release_sock(sk);
 566         return(0);
 567 }
 568 
 569 
 570 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 571 {
 572         int err;
 573         switch(cmd) 
 574         {
 575 
 576                 case TIOCINQ:
 577 #ifdef FIXME    /* FIXME: */
 578                 case FIONREAD:
 579 #endif
 580                 {
 581                         unsigned long amount;
 582 
 583                         if (sk->state == TCP_LISTEN) 
 584                                 return(-EINVAL);
 585 
 586                         sk->inuse = 1;
 587                         amount = tcp_readable(sk);
 588                         release_sock(sk);
 589                         err=verify_area(VERIFY_WRITE,(void *)arg,
 590                                                    sizeof(unsigned long));
 591                         if(err)
 592                                 return err;
 593                         put_fs_long(amount,(unsigned long *)arg);
 594                         return(0);
 595                 }
 596                 case SIOCATMARK:
 597                 {
 598                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
 599 
 600                         err = verify_area(VERIFY_WRITE,(void *) arg,
 601                                                   sizeof(unsigned long));
 602                         if (err)
 603                                 return err;
 604                         put_fs_long(answ,(int *) arg);
 605                         return(0);
 606                 }
 607                 case TIOCOUTQ:
 608                 {
 609                         unsigned long amount;
 610 
 611                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 612                         amount = sk->prot->wspace(sk);
 613                         err=verify_area(VERIFY_WRITE,(void *)arg,
 614                                                    sizeof(unsigned long));
 615                         if(err)
 616                                 return err;
 617                         put_fs_long(amount,(unsigned long *)arg);
 618                         return(0);
 619                 }
 620                 default:
 621                         return(-EINVAL);
 622         }
 623 }
 624 
 625 
 626 /*
 627  *      This routine computes a TCP checksum. 
 628  */
 629  
 630 unsigned short tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 631           unsigned long saddr, unsigned long daddr)
 632 {     
 633         unsigned long sum;
 634    
 635         if (saddr == 0) saddr = ip_my_addr();
 636 
 637 /*
 638  * stupid, gcc complains when I use just one __asm__ block,
 639  * something about too many reloads, but this is just two
 640  * instructions longer than what I want
 641  */
 642         __asm__("
 643             addl %%ecx, %%ebx
 644             adcl %%edx, %%ebx
 645             adcl $0, %%ebx
 646             "
 647         : "=b"(sum)
 648         : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 649         : "bx", "cx", "dx" );
 650         __asm__("
 651             movl %%ecx, %%edx
 652             cld
 653             cmpl $32, %%ecx
 654             jb 2f
 655             shrl $5, %%ecx
 656             clc
 657 1:          lodsl
 658             adcl %%eax, %%ebx
 659             lodsl
 660             adcl %%eax, %%ebx
 661             lodsl
 662             adcl %%eax, %%ebx
 663             lodsl
 664             adcl %%eax, %%ebx
 665             lodsl
 666             adcl %%eax, %%ebx
 667             lodsl
 668             adcl %%eax, %%ebx
 669             lodsl
 670             adcl %%eax, %%ebx
 671             lodsl
 672             adcl %%eax, %%ebx
 673             loop 1b
 674             adcl $0, %%ebx
 675             movl %%edx, %%ecx
 676 2:          andl $28, %%ecx
 677             je 4f
 678             shrl $2, %%ecx
 679             clc
 680 3:          lodsl
 681             adcl %%eax, %%ebx
 682             loop 3b
 683             adcl $0, %%ebx
 684 4:          movl $0, %%eax
 685             testw $2, %%dx
 686             je 5f
 687             lodsw
 688             addl %%eax, %%ebx
 689             adcl $0, %%ebx
 690             movw $0, %%ax
 691 5:          test $1, %%edx
 692             je 6f
 693             lodsb
 694             addl %%eax, %%ebx
 695             adcl $0, %%ebx
 696 6:          movl %%ebx, %%eax
 697             shrl $16, %%eax
 698             addw %%ax, %%bx
 699             adcw $0, %%bx
 700             "
 701         : "=b"(sum)
 702         : "0"(sum), "c"(len), "S"(th)
 703         : "ax", "bx", "cx", "dx", "si" );
 704 
 705         /* We only want the bottom 16 bits, but we never cleared the top 16. */
 706   
 707         return((~sum) & 0xffff);
 708 }
 709 
 710 
 711 
 712 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 713                 unsigned long daddr, int len, struct sock *sk)
 714 {
 715         th->check = 0;
 716         th->check = tcp_check(th, len, saddr, daddr);
 717         return;
 718 }
 719 
 720 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 721 {
 722         int size;
 723         struct tcphdr * th = skb->h.th;
 724 
 725         /* length of packet (not counting length of pre-tcp headers) */
 726         size = skb->len - ((unsigned char *) th - skb->data);
 727 
 728         /* sanity check it.. */
 729         if (size < sizeof(struct tcphdr) || size > skb->len) 
 730         {
 731                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 732                         skb, skb->data, th, skb->len);
 733                 kfree_skb(skb, FREE_WRITE);
 734                 return;
 735         }
 736 
 737         /* If we have queued a header size packet.. */
 738         if (size == sizeof(struct tcphdr)) 
 739         {
 740                 /* If its got a syn or fin its notionally included in the size..*/
 741                 if(!th->syn && !th->fin) 
 742                 {
 743                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 744                         kfree_skb(skb,FREE_WRITE);
 745                         return;
 746                 }
 747         }
 748 
 749         tcp_statistics.TcpOutSegs++;  
 750 
 751         skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
 752         if (after(skb->h.seq, sk->window_seq) ||
 753             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 754              sk->packets_out >= sk->cong_window) 
 755         {
 756                 /* checksum will be supplied by tcp_write_xmit.  So
 757                  * we shouldn't need to set it at all.  I'm being paranoid */
 758                 th->check = 0;
 759                 if (skb->next != NULL) 
 760                 {
 761                         printk("tcp_send_partial: next != NULL\n");
 762                         skb_unlink(skb);
 763                 }
 764                 skb_queue_tail(&sk->write_queue, skb);
 765                 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 766                     sk->send_head == NULL &&
 767                     sk->ack_backlog == 0)
 768                         reset_timer(sk, TIME_PROBE0, sk->rto);
 769         } 
 770         else 
 771         {
 772                 th->ack_seq = ntohl(sk->acked_seq);
 773                 th->window = ntohs(tcp_select_window(sk));
 774 
 775                 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
 776 
 777                 sk->sent_seq = sk->write_seq;
 778                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 779         }
 780 }
 781 
 782 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 783 {
 784         struct sk_buff * skb;
 785         unsigned long flags;
 786 
 787         save_flags(flags);
 788         cli();
 789         skb = sk->partial;
 790         if (skb) {
 791                 sk->partial = NULL;
 792                 del_timer(&sk->partial_timer);
 793         }
 794         restore_flags(flags);
 795         return skb;
 796 }
 797 
 798 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 799 {
 800         struct sk_buff *skb;
 801 
 802         if (sk == NULL)
 803                 return;
 804         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 805                 tcp_send_skb(sk, skb);
 806 }
 807 
 808 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 809 {
 810         struct sk_buff * tmp;
 811         unsigned long flags;
 812 
 813         save_flags(flags);
 814         cli();
 815         tmp = sk->partial;
 816         if (tmp)
 817                 del_timer(&sk->partial_timer);
 818         sk->partial = skb;
 819         init_timer(&sk->partial_timer);
 820         sk->partial_timer.expires = HZ;
 821         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 822         sk->partial_timer.data = (unsigned long) sk;
 823         add_timer(&sk->partial_timer);
 824         restore_flags(flags);
 825         if (tmp)
 826                 tcp_send_skb(sk, tmp);
 827 }
 828 
 829 
 830 /*
 831  *      This routine sends an ack and also updates the window. 
 832  */
 833  
 834 static void tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 835              struct sock *sk,
 836              struct tcphdr *th, unsigned long daddr)
 837 {
 838         struct sk_buff *buff;
 839         struct tcphdr *t1;
 840         struct device *dev = NULL;
 841         int tmp;
 842 
 843         if(sk->zapped)
 844                 return;         /* We have been reset, we may not send again */
 845         /*
 846          * We need to grab some memory, and put together an ack,
 847          * and then put it into the queue to be sent.
 848          */
 849 
 850         buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 851         if (buff == NULL) 
 852         {
 853                 /* Force it to send an ack. */
 854                 sk->ack_backlog++;
 855                 if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) 
 856                 {
 857                         reset_timer(sk, TIME_WRITE, 10);
 858                 }
 859                 return;
 860         }
 861 
 862         buff->len = sizeof(struct tcphdr);
 863         buff->sk = sk;
 864         buff->localroute = sk->localroute;
 865         t1 =(struct tcphdr *) buff->data;
 866 
 867         /* Put in the IP header and routing stuff. */
 868         tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 869                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 870         if (tmp < 0) 
 871         {
 872                 buff->free=1;
 873                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 874                 return;
 875         }
 876         buff->len += tmp;
 877         t1 =(struct tcphdr *)((char *)t1 +tmp);
 878 
 879         /* FIXME: */
 880         memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 881 
 882         /*
 883          *      Swap the send and the receive. 
 884          */
 885          
 886         t1->dest = th->source;
 887         t1->source = th->dest;
 888         t1->seq = ntohl(sequence);
 889         t1->ack = 1;
 890         sk->window = tcp_select_window(sk);
 891         t1->window = ntohs(sk->window);
 892         t1->res1 = 0;
 893         t1->res2 = 0;
 894         t1->rst = 0;
 895         t1->urg = 0;
 896         t1->syn = 0;
 897         t1->psh = 0;
 898         t1->fin = 0;
 899         if (ack == sk->acked_seq) 
 900         {
 901                 sk->ack_backlog = 0;
 902                 sk->bytes_rcv = 0;
 903                 sk->ack_timed = 0;
 904                 if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
 905                                   && sk->timeout == TIME_WRITE) 
 906                 {
 907                         if(sk->keepopen) {
 908                                 reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 909                         } else {
 910                                 delete_timer(sk);
 911                         }
 912                 }
 913         }
 914         t1->ack_seq = ntohl(ack);
 915         t1->doff = sizeof(*t1)/4;
 916         tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 917         if (sk->debug)
 918                  printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 919         tcp_statistics.TcpOutSegs++;
 920         sk->prot->queue_xmit(sk, dev, buff, 1);
 921 }
 922 
 923 
 924 /* 
 925  *      This routine builds a generic TCP header. 
 926  */
 927  
 928 static int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 929 {
 930 
 931         /* FIXME: want to get rid of this. */
 932         memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 933         th->seq = htonl(sk->write_seq);
 934         th->psh =(push == 0) ? 1 : 0;
 935         th->doff = sizeof(*th)/4;
 936         th->ack = 1;
 937         th->fin = 0;
 938         sk->ack_backlog = 0;
 939         sk->bytes_rcv = 0;
 940         sk->ack_timed = 0;
 941         th->ack_seq = htonl(sk->acked_seq);
 942         sk->window = tcp_select_window(sk);
 943         th->window = htons(sk->window);
 944 
 945         return(sizeof(*th));
 946 }
 947 
 948 /*
 949  *      This routine copies from a user buffer into a socket,
 950  *      and starts the transmit system.
 951  */
 952 
 953 static int tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 954           int len, int nonblock, unsigned flags)
 955 {
 956         int copied = 0;
 957         int copy;
 958         int tmp;
 959         struct sk_buff *skb;
 960         struct sk_buff *send_tmp;
 961         unsigned char *buff;
 962         struct proto *prot;
 963         struct device *dev = NULL;
 964 
 965         sk->inuse=1;
 966         prot = sk->prot;
 967         while(len > 0) 
 968         {
 969                 if (sk->err) 
 970                 {                       /* Stop on an error */
 971                         release_sock(sk);
 972                         if (copied) 
 973                                 return(copied);
 974                         tmp = -sk->err;
 975                         sk->err = 0;
 976                         return(tmp);
 977                 }
 978 
 979         /*
 980          *      First thing we do is make sure that we are established. 
 981          */
 982         
 983                 if (sk->shutdown & SEND_SHUTDOWN) 
 984                 {
 985                         release_sock(sk);
 986                         sk->err = EPIPE;
 987                         if (copied) 
 988                                 return(copied);
 989                         sk->err = 0;
 990                         return(-EPIPE);
 991                 }
 992 
 993 
 994         /* 
 995          *      Wait for a connection to finish.
 996          */
 997         
 998                 while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
 999                 {
1000                         if (sk->err) 
1001                         {
1002                                 release_sock(sk);
1003                                 if (copied) 
1004                                         return(copied);
1005                                 tmp = -sk->err;
1006                                 sk->err = 0;
1007                                 return(tmp);
1008                         }
1009 
1010                         if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
1011                         {
1012                                 release_sock(sk);
1013                                 if (copied) 
1014                                         return(copied);
1015 
1016                                 if (sk->err) 
1017                                 {
1018                                         tmp = -sk->err;
1019                                         sk->err = 0;
1020                                         return(tmp);
1021                                 }
1022 
1023                                 if (sk->keepopen) 
1024                                 {
1025                                         send_sig(SIGPIPE, current, 0);
1026                                 }
1027                                 return(-EPIPE);
1028                         }
1029 
1030                         if (nonblock || copied) 
1031                         {
1032                                 release_sock(sk);
1033                                 if (copied) 
1034                                         return(copied);
1035                                 return(-EAGAIN);
1036                         }
1037 
1038                         release_sock(sk);
1039                         cli();
1040                 
1041                         if (sk->state != TCP_ESTABLISHED &&
1042                                 sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
1043                         {
1044                                 interruptible_sleep_on(sk->sleep);
1045                                 if (current->signal & ~current->blocked) 
1046                                 {
1047                                         sti();
1048                                         if (copied) 
1049                                                 return(copied);
1050                                         return(-ERESTARTSYS);
1051                                 }
1052                         }
1053                         sk->inuse = 1;
1054                         sti();
1055                 }
1056 
1057         /*
1058          * The following code can result in copy <= if sk->mss is ever
1059          * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
1060          * sk->mtu is constant once SYN processing is finished.  I.e. we
1061          * had better not get here until we've seen his SYN and at least one
1062          * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
1063          * But ESTABLISHED should guarantee that.  sk->max_window is by definition
1064          * non-decreasing.  Note that any ioctl to set user_mss must be done
1065          * before the exchange of SYN's.  If the initial ack from the other
1066          * end has a window of 0, max_window and thus mss will both be 0.
1067          */
1068 
1069         /* 
1070          *      Now we need to check if we have a half built packet. 
1071          */
1072 
1073                 if ((skb = tcp_dequeue_partial(sk)) != NULL) 
1074                 {
1075                         int hdrlen;
1076 
1077                          /* IP header + TCP header */
1078                         hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
1079                                  + sizeof(struct tcphdr);
1080         
1081                         /* Add more stuff to the end of skb->len */
1082                         if (!(flags & MSG_OOB)) 
1083                         {
1084                                 copy = min(sk->mss - (skb->len - hdrlen), len);
1085                                 /* FIXME: this is really a bug. */
1086                                 if (copy <= 0) 
1087                                 {
1088                                         printk("TCP: **bug**: \"copy\" <= 0!!\n");
1089                                         copy = 0;
1090                                 }
1091           
1092                                 memcpy_fromfs(skb->data + skb->len, from, copy);
1093                                 skb->len += copy;
1094                                 from += copy;
1095                                 copied += copy;
1096                                 len -= copy;
1097                                 sk->write_seq += copy;
1098                         }
1099                         if ((skb->len - hdrlen) >= sk->mss ||
1100                                 (flags & MSG_OOB) || !sk->packets_out)
1101                                 tcp_send_skb(sk, skb);
1102                         else
1103                                 tcp_enqueue_partial(skb, sk);
1104                         continue;
1105                 }
1106 
1107         /*
1108          * We also need to worry about the window.
1109          * If window < 1/2 the maximum window we've seen from this
1110          *   host, don't use it.  This is sender side
1111          *   silly window prevention, as specified in RFC1122.
1112          *   (Note that this is different than earlier versions of
1113          *   SWS prevention, e.g. RFC813.).  What we actually do is 
1114          *   use the whole MSS.  Since the results in the right
1115          *   edge of the packet being outside the window, it will
1116          *   be queued for later rather than sent.
1117          */
1118 
1119                 copy = sk->window_seq - sk->write_seq;
1120                 if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
1121                         copy = sk->mss;
1122                 if (copy > len)
1123                         copy = len;
1124 
1125         /*
1126          *      We should really check the window here also. 
1127          */
1128          
1129                 send_tmp = NULL;
1130                 if (copy < sk->mss && !(flags & MSG_OOB)) 
1131                 {
1132                         /*
1133                          *      We will release the socket incase we sleep here. 
1134                          */
1135                         release_sock(sk);
1136                         /*
1137                          *      NB: following must be mtu, because mss can be increased.
1138                          *      mss is always <= mtu 
1139                          */
1140                         skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
1141                         sk->inuse = 1;
1142                         send_tmp = skb;
1143                 } 
1144                 else 
1145                 {
1146                         /*
1147                          *      We will release the socket incase we sleep here. 
1148                          */
1149                         release_sock(sk);
1150                         skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
1151                         sk->inuse = 1;
1152                 }
1153 
1154                 /*
1155                  *      If we didn't get any memory, we need to sleep. 
1156                  */
1157 
1158                 if (skb == NULL) 
1159                 {
1160                         if (nonblock) 
1161                         {
1162                                 release_sock(sk);
1163                                 if (copied) 
1164                                         return(copied);
1165                                 return(-EAGAIN);
1166                         }
1167 
1168                         /*
1169                          *      FIXME: here is another race condition. 
1170                          */
1171 
1172                         tmp = sk->wmem_alloc;
1173                         release_sock(sk);
1174                         cli();
1175                         /*
1176                          *      Again we will try to avoid it. 
1177                          */
1178                         if (tmp <= sk->wmem_alloc &&
1179                                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
1180                                 && sk->err == 0) 
1181                         {
1182                                 interruptible_sleep_on(sk->sleep);
1183                                 if (current->signal & ~current->blocked) 
1184                                 {
1185                                         sti();
1186                                         if (copied) 
1187                                                 return(copied);
1188                                         return(-ERESTARTSYS);
1189                                 }
1190                         }
1191                         sk->inuse = 1;
1192                         sti();
1193                         continue;
1194                 }
1195 
1196                 skb->len = 0;
1197                 skb->sk = sk;
1198                 skb->free = 0;
1199                 skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
1200         
1201                 buff = skb->data;
1202         
1203                 /*
1204                  * FIXME: we need to optimize this.
1205                  * Perhaps some hints here would be good.
1206                  */
1207                 
1208                 tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1209                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1210                 if (tmp < 0 ) 
1211                 {
1212                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1213                         release_sock(sk);
1214                         if (copied) 
1215                                 return(copied);
1216                         return(tmp);
1217                 }
1218                 skb->len += tmp;
1219                 skb->dev = dev;
1220                 buff += tmp;
1221                 skb->h.th =(struct tcphdr *) buff;
1222                 tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1223                 if (tmp < 0) 
1224                 {
1225                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1226                         release_sock(sk);
1227                         if (copied) 
1228                                 return(copied);
1229                         return(tmp);
1230                 }
1231 
1232                 if (flags & MSG_OOB) 
1233                 {
1234                         ((struct tcphdr *)buff)->urg = 1;
1235                         ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1236                 }
1237                 skb->len += tmp;
1238                 memcpy_fromfs(buff+tmp, from, copy);
1239 
1240                 from += copy;
1241                 copied += copy;
1242                 len -= copy;
1243                 skb->len += copy;
1244                 skb->free = 0;
1245                 sk->write_seq += copy;
1246         
1247                 if (send_tmp != NULL && sk->packets_out) 
1248                 {
1249                         tcp_enqueue_partial(send_tmp, sk);
1250                         continue;
1251                 }
1252                 tcp_send_skb(sk, skb);
1253         }
1254         sk->err = 0;
1255 
1256 /*
1257  *      Nagle's rule. Turn Nagle off with TCP_NODELAY for highly
1258  *      interactive fast network servers. It's meant to be on and
1259  *      it really improves the throughput though not the echo time
1260  *      on my slow slip link - Alan
1261  */
1262 
1263 /*
1264  *      Avoid possible race on send_tmp - c/o Johannes Stille 
1265  */
1266  
1267         if(sk->partial && ((!sk->packets_out) 
1268      /* If not nagling we can send on the before case too.. */
1269               || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1270         ))
1271                 tcp_send_partial(sk);
1272 
1273         release_sock(sk);
1274         return(copied);
1275 }
1276 
1277 
1278 static int tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1279            int len, int nonblock, unsigned flags,
1280            struct sockaddr_in *addr, int addr_len)
1281 {
1282         if (flags & ~(MSG_OOB|MSG_DONTROUTE))
1283                 return -EINVAL;
1284         if (sk->state == TCP_CLOSE)
1285                 return -ENOTCONN;
1286         if (addr_len < sizeof(*addr))
1287                 return -EINVAL;
1288         if (addr->sin_family && addr->sin_family != AF_INET) 
1289                 return -EINVAL;
1290         if (addr->sin_port != sk->dummy_th.dest) 
1291                 return -EISCONN;
1292         if (addr->sin_addr.s_addr != sk->daddr) 
1293                 return -EISCONN;
1294         return tcp_write(sk, from, len, nonblock, flags);
1295 }
1296 
1297 
1298 static void tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1299 {
1300         int tmp;
1301         struct device *dev = NULL;
1302         struct tcphdr *t1;
1303         struct sk_buff *buff;
1304 
1305         if (!sk->ack_backlog) 
1306                 return;
1307 
1308         /*
1309          * FIXME: we need to put code here to prevent this routine from
1310          * being called.  Being called once in a while is ok, so only check
1311          * if this is the second time in a row.
1312          */
1313 
1314         /*
1315          * We need to grab some memory, and put together an ack,
1316          * and then put it into the queue to be sent.
1317          */
1318 
1319         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1320         if (buff == NULL) 
1321         {
1322                 /* Try again real soon. */
1323                 reset_timer(sk, TIME_WRITE, 10);
1324                 return;
1325         }
1326 
1327         buff->len = sizeof(struct tcphdr);
1328         buff->sk = sk;
1329         buff->localroute = sk->localroute;
1330         
1331         /*
1332          *      Put in the IP header and routing stuff. 
1333          */
1334 
1335         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1336                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1337         if (tmp < 0) 
1338         {
1339                 buff->free=1;
1340                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1341                 return;
1342         }
1343 
1344         buff->len += tmp;
1345         t1 =(struct tcphdr *)(buff->data +tmp);
1346 
1347         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1348         t1->seq = htonl(sk->sent_seq);
1349         t1->ack = 1;
1350         t1->res1 = 0;
1351         t1->res2 = 0;
1352         t1->rst = 0;
1353         t1->urg = 0;
1354         t1->syn = 0;
1355         t1->psh = 0;
1356         sk->ack_backlog = 0;
1357         sk->bytes_rcv = 0;
1358         sk->window = tcp_select_window(sk);
1359         t1->window = ntohs(sk->window);
1360         t1->ack_seq = ntohl(sk->acked_seq);
1361         t1->doff = sizeof(*t1)/4;
1362         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1363         sk->prot->queue_xmit(sk, dev, buff, 1);
1364         tcp_statistics.TcpOutSegs++;
1365 }
1366 
1367 
1368 /*
1369  *      FIXME:
1370  *      This routine frees used buffers.
1371  *      It should consider sending an ACK to let the
1372  *      other end know we now have a bigger window.
1373  */
1374 
1375 static void cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1376 {
1377         unsigned long flags;
1378         unsigned long left;
1379         struct sk_buff *skb;
1380         unsigned long rspace;
1381 
1382         if(sk->debug)
1383                 printk("cleaning rbuf for sk=%p\n", sk);
1384   
1385         save_flags(flags);
1386         cli();
1387   
1388         left = sk->prot->rspace(sk);
1389  
1390         /*
1391          * We have to loop through all the buffer headers,
1392          * and try to free up all the space we can.
1393          */
1394 
1395         while((skb=skb_peek(&sk->receive_queue)) != NULL) 
1396         {
1397                 if (!skb->used) 
1398                         break;
1399                 skb_unlink(skb);
1400                 skb->sk = sk;
1401                 kfree_skb(skb, FREE_READ);
1402         }
1403 
1404         restore_flags(flags);
1405 
1406         /*
1407          * FIXME:
1408          * At this point we should send an ack if the difference
1409          * in the window, and the amount of space is bigger than
1410          * TCP_WINDOW_DIFF.
1411          */
1412 
1413         if(sk->debug)
1414                 printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk),
1415                                             left);
1416         if ((rspace=sk->prot->rspace(sk)) != left) 
1417         {
1418                 /*
1419                  * This area has caused the most trouble.  The current strategy
1420                  * is to simply do nothing if the other end has room to send at
1421                  * least 3 full packets, because the ack from those will auto-
1422                  * matically update the window.  If the other end doesn't think
1423                  * we have much space left, but we have room for at least 1 more
1424                  * complete packet than it thinks we do, we will send an ack
1425                  * immediately.  Otherwise we will wait up to .5 seconds in case
1426                  * the user reads some more.
1427                  */
1428                 sk->ack_backlog++;
1429         /*
1430          * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1431          * if the other end is offering a window smaller than the agreed on MSS
1432          * (called sk->mtu here).  In theory there's no connection between send
1433          * and receive, and so no reason to think that they're going to send
1434          * small packets.  For the moment I'm using the hack of reducing the mss
1435          * only on the send side, so I'm putting mtu here.
1436          */
1437 
1438                 if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) 
1439                 {
1440                         /* Send an ack right now. */
1441                         tcp_read_wakeup(sk);
1442                 } 
1443                 else 
1444                 {
1445                         /* Force it to send an ack soon. */
1446                         int was_active = del_timer(&sk->timer);
1447                         if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
1448                         {
1449                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1450                         } 
1451                         else
1452                                 add_timer(&sk->timer);
1453                 }
1454         }
1455 } 
1456 
1457 
1458 /*
1459  *      Handle reading urgent data. 
1460  */
1461  
1462 static int tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1463              unsigned char *to, int len, unsigned flags)
1464 {
1465         if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1466                 return -EINVAL;
1467         if (sk->err) 
1468         {
1469                 int tmp = -sk->err;
1470                 sk->err = 0;
1471                 return tmp;
1472         }
1473 
1474         if (sk->state == TCP_CLOSE || sk->done) 
1475         {
1476                 if (!sk->done) {
1477                         sk->done = 1;
1478                         return 0;
1479                 }
1480                 return -ENOTCONN;
1481         }
1482 
1483         if (sk->shutdown & RCV_SHUTDOWN) 
1484         {
1485                 sk->done = 1;
1486                 return 0;
1487         }
1488         sk->inuse = 1;
1489         if (sk->urg_data & URG_VALID) 
1490         {
1491                 char c = sk->urg_data;
1492                 if (!(flags & MSG_PEEK))
1493                         sk->urg_data = URG_READ;
1494                 put_fs_byte(c, to);
1495                 release_sock(sk);
1496                 return 1;
1497         }
1498         release_sock(sk);
1499         
1500         /*
1501          * Fixed the recv(..., MSG_OOB) behaviour.  BSD docs and
1502          * the available implementations agree in this case:
1503          * this call should never block, independent of the
1504          * blocking state of the socket.
1505          * Mike <pall@rz.uni-karlsruhe.de>
1506          */
1507         return -EAGAIN;
1508 }
1509 
1510 
1511 /*
1512  *      This routine copies from a sock struct into the user buffer. 
1513  */
1514  
1515 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1516         int len, int nonblock, unsigned flags)
1517 {
1518         struct wait_queue wait = { current, NULL };
1519         int copied = 0;
1520         unsigned long peek_seq;
1521         unsigned long *seq;
1522         unsigned long used;
1523 
1524         /* This error should be checked. */
1525         if (sk->state == TCP_LISTEN)
1526                 return -ENOTCONN;
1527 
1528         /* Urgent data needs to be handled specially. */
1529         if (flags & MSG_OOB)
1530                 return tcp_read_urg(sk, nonblock, to, len, flags);
1531 
1532         peek_seq = sk->copied_seq;
1533         seq = &sk->copied_seq;
1534         if (flags & MSG_PEEK)
1535                 seq = &peek_seq;
1536 
1537         add_wait_queue(sk->sleep, &wait);
1538         sk->inuse = 1;
1539         while (len > 0) 
1540         {
1541                 struct sk_buff * skb;
1542                 unsigned long offset;
1543         
1544                 /*
1545                  * are we at urgent data? Stop if we have read anything.
1546                  */
1547                 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1548                         break;
1549 
1550                 current->state = TASK_INTERRUPTIBLE;
1551 
1552                 skb = skb_peek(&sk->receive_queue);
1553                 do 
1554                 {
1555                         if (!skb)
1556                                 break;
1557                         if (before(1+*seq, skb->h.th->seq))
1558                                 break;
1559                         offset = 1 + *seq - skb->h.th->seq;
1560                         if (skb->h.th->syn)
1561                                 offset--;
1562                         if (offset < skb->len)
1563                                 goto found_ok_skb;
1564                         if (!(flags & MSG_PEEK))
1565                                 skb->used = 1;
1566                         skb = skb->next;
1567                 }
1568                 while (skb != (struct sk_buff *)&sk->receive_queue);
1569 
1570                 if (copied)
1571                         break;
1572 
1573                 if (sk->err) 
1574                 {
1575                         copied = -sk->err;
1576                         sk->err = 0;
1577                         break;
1578                 }
1579 
1580                 if (sk->state == TCP_CLOSE) 
1581                 {
1582                         if (!sk->done) 
1583                         {
1584                                 sk->done = 1;
1585                                 break;
1586                         }
1587                         copied = -ENOTCONN;
1588                         break;
1589                 }
1590 
1591                 if (sk->shutdown & RCV_SHUTDOWN) 
1592                 {
1593                         sk->done = 1;
1594                         break;
1595                 }
1596                         
1597                 if (nonblock) 
1598                 {
1599                         copied = -EAGAIN;
1600                         break;
1601                 }
1602 
1603                 cleanup_rbuf(sk);
1604                 release_sock(sk);
1605                 schedule();
1606                 sk->inuse = 1;
1607 
1608                 if (current->signal & ~current->blocked) 
1609                 {
1610                         copied = -ERESTARTSYS;
1611                         break;
1612                 }
1613                 continue;
1614 
1615         found_ok_skb:
1616                 /* Ok so how much can we use ? */
1617                 used = skb->len - offset;
1618                 if (len < used)
1619                         used = len;
1620                 /* do we have urgent data here? */
1621                 if (sk->urg_data) 
1622                 {
1623                         unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1624                         if (urg_offset < used) 
1625                         {
1626                                 if (!urg_offset) 
1627                                 {
1628                                         if (!sk->urginline) 
1629                                         {
1630                                                 ++*seq;
1631                                                 offset++;
1632                                                 used--;
1633                                         }
1634                                 }
1635                                 else
1636                                         used = urg_offset;
1637                         }
1638                 }
1639                 /* Copy it */
1640                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1641                         skb->h.th->doff*4 + offset, used);
1642                 copied += used;
1643                 len -= used;
1644                 to += used;
1645                 *seq += used;
1646                 if (after(sk->copied_seq+1,sk->urg_seq))
1647                         sk->urg_data = 0;
1648                 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1649                         skb->used = 1;
1650         }
1651         remove_wait_queue(sk->sleep, &wait);
1652         current->state = TASK_RUNNING;
1653 
1654         /* Clean up data we have read: This will do ACK frames */
1655         cleanup_rbuf(sk);
1656         release_sock(sk);
1657         return copied;
1658 }
1659 
1660  
1661 /*
1662  *      Shutdown the sending side of a connection.
1663  */
1664 
1665 void tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1666 {
1667         struct sk_buff *buff;
1668         struct tcphdr *t1, *th;
1669         struct proto *prot;
1670         int tmp;
1671         struct device *dev = NULL;
1672 
1673         /*
1674          * We need to grab some memory, and put together a FIN,
1675          * and then put it into the queue to be sent.
1676          * FIXME:
1677          *
1678          *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1679          *      Most of this is guesswork, so maybe it will work...
1680          */
1681 
1682         if (!(how & SEND_SHUTDOWN)) 
1683                 return;
1684          
1685         /*
1686          *      If we've already sent a FIN, return. 
1687          */
1688          
1689         if (sk->state == TCP_FIN_WAIT1 ||
1690             sk->state == TCP_FIN_WAIT2 ||
1691             sk->state == TCP_CLOSING ||
1692             sk->state == TCP_LAST_ACK ||
1693             sk->state == TCP_TIME_WAIT
1694         ) 
1695         {
1696                 return;
1697         }
1698         sk->inuse = 1;
1699 
1700         /*
1701          * flag that the sender has shutdown
1702          */
1703 
1704         sk->shutdown |= SEND_SHUTDOWN;
1705 
1706         /*
1707          *  Clear out any half completed packets. 
1708          */
1709 
1710         if (sk->partial)
1711                 tcp_send_partial(sk);
1712 
1713         prot =(struct proto *)sk->prot;
1714         th =(struct tcphdr *)&sk->dummy_th;
1715         release_sock(sk); /* incase the malloc sleeps. */
1716         buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1717         if (buff == NULL)
1718                 return;
1719         sk->inuse = 1;
1720 
1721         buff->sk = sk;
1722         buff->len = sizeof(*t1);
1723         buff->localroute = sk->localroute;
1724         t1 =(struct tcphdr *) buff->data;
1725 
1726         /*
1727          *      Put in the IP header and routing stuff. 
1728          */
1729 
1730         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1731                            IPPROTO_TCP, sk->opt,
1732                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1733         if (tmp < 0) 
1734         {
1735                 /*
1736                  *      Finish anyway, treat this as a send that got lost. 
1737                  *
1738                  *      Enter FIN_WAIT1 on normal shutdown, which waits for
1739                  *      written data to be completely acknowledged along
1740                  *      with an acknowledge to our FIN.
1741                  *
1742                  *      Enter FIN_WAIT2 on abnormal shutdown -- close before
1743                  *      connection established.
1744                  */
1745                 buff->free=1;
1746                 prot->wfree(sk,buff->mem_addr, buff->mem_len);
1747 
1748                 if (sk->state == TCP_ESTABLISHED)
1749                         tcp_set_state(sk,TCP_FIN_WAIT1);
1750                 else if(sk->state == TCP_CLOSE_WAIT)
1751                         tcp_set_state(sk,TCP_LAST_ACK);
1752                 else
1753                         tcp_set_state(sk,TCP_FIN_WAIT2);
1754 
1755                 release_sock(sk);
1756                 return;
1757         }
1758 
1759         t1 =(struct tcphdr *)((char *)t1 +tmp);
1760         buff->len += tmp;
1761         buff->dev = dev;
1762         memcpy(t1, th, sizeof(*t1));
1763         t1->seq = ntohl(sk->write_seq);
1764         sk->write_seq++;
1765         buff->h.seq = sk->write_seq;
1766         t1->ack = 1;
1767         t1->ack_seq = ntohl(sk->acked_seq);
1768         t1->window = ntohs(sk->window=tcp_select_window(sk));
1769         t1->fin = 1;
1770         t1->rst = 0;
1771         t1->doff = sizeof(*t1)/4;
1772         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1773 
1774         /*
1775          * If there is data in the write queue, the fin must be appended to
1776          * the write queue.
1777          */
1778         
1779         if (skb_peek(&sk->write_queue) != NULL) 
1780         {
1781                 buff->free=0;
1782                 if (buff->next != NULL) 
1783                 {
1784                         printk("tcp_shutdown: next != NULL\n");
1785                         skb_unlink(buff);
1786                 }
1787                 skb_queue_tail(&sk->write_queue, buff);
1788         } 
1789         else 
1790         {
1791                 sk->sent_seq = sk->write_seq;
1792                 sk->prot->queue_xmit(sk, dev, buff, 0);
1793         }
1794 
1795         if (sk->state == TCP_ESTABLISHED) 
1796                 tcp_set_state(sk,TCP_FIN_WAIT1);
1797         else if (sk->state == TCP_CLOSE_WAIT)
1798                 tcp_set_state(sk,TCP_LAST_ACK);
1799         else
1800                 tcp_set_state(sk,TCP_FIN_WAIT2);
1801 
1802         release_sock(sk);
1803 }
1804 
1805 
1806 static int
1807 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1808              int to_len, int nonblock, unsigned flags,
1809              struct sockaddr_in *addr, int *addr_len)
1810 {
1811         int result;
1812   
1813         /* 
1814          *      Have to check these first unlike the old code. If 
1815          *      we check them after we lose data on an error
1816          *      which is wrong 
1817          */
1818 
1819         if(addr_len)
1820                 *addr_len = sizeof(*addr);
1821         result=tcp_read(sk, to, to_len, nonblock, flags);
1822 
1823         if (result < 0) 
1824                 return(result);
1825   
1826         if(addr)
1827         {
1828                 addr->sin_family = AF_INET;
1829                 addr->sin_port = sk->dummy_th.dest;
1830                 addr->sin_addr.s_addr = sk->daddr;
1831         }
1832         return(result);
1833 }
1834 
1835 
1836 /*
1837  *      This routine will send an RST to the other tcp. 
1838  */
1839  
1840 static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1841           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1842 {
1843         struct sk_buff *buff;
1844         struct tcphdr *t1;
1845         int tmp;
1846         struct device *ndev=NULL;
1847   
1848 /*
1849  * We need to grab some memory, and put together an RST,
1850  * and then put it into the queue to be sent.
1851  */
1852 
1853         buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1854         if (buff == NULL) 
1855                 return;
1856 
1857         buff->len = sizeof(*t1);
1858         buff->sk = NULL;
1859         buff->dev = dev;
1860         buff->localroute = 0;
1861 
1862         t1 =(struct tcphdr *) buff->data;
1863 
1864         /*
1865          *      Put in the IP header and routing stuff. 
1866          */
1867 
1868         tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
1869                            sizeof(struct tcphdr),tos,ttl);
1870         if (tmp < 0) 
1871         {
1872                 buff->free = 1;
1873                 prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1874                 return;
1875         }
1876 
1877         t1 =(struct tcphdr *)((char *)t1 +tmp);
1878         buff->len += tmp;
1879         memcpy(t1, th, sizeof(*t1));
1880 
1881         /*
1882          *      Swap the send and the receive. 
1883          */
1884 
1885         t1->dest = th->source;
1886         t1->source = th->dest;
1887         t1->rst = 1;  
1888         t1->window = 0;
1889   
1890         if(th->ack)
1891         {
1892                 t1->ack = 0;
1893                 t1->seq = th->ack_seq;
1894                 t1->ack_seq = 0;
1895         }
1896         else
1897         {
1898                 t1->ack = 1;
1899                 if(!th->syn)
1900                         t1->ack_seq=htonl(th->seq);
1901                 else
1902                         t1->ack_seq=htonl(th->seq+1);
1903                 t1->seq=0;
1904         }
1905 
1906         t1->syn = 0;
1907         t1->urg = 0;
1908         t1->fin = 0;
1909         t1->psh = 0;
1910         t1->doff = sizeof(*t1)/4;
1911         tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1912         prot->queue_xmit(NULL, ndev, buff, 1);
1913         tcp_statistics.TcpOutSegs++;
1914 }
1915 
1916 
1917 /*
1918  *      Look for tcp options. Parses everything but only knows about MSS.
1919  *      This routine is always called with the packet containing the SYN.
1920  *      However it may also be called with the ack to the SYN.  So you
1921  *      can't assume this is always the SYN.  It's always called after
1922  *      we have set up sk->mtu to our own MTU.
1923  */
1924  
1925 static void tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1926 {
1927         unsigned char *ptr;
1928         int length=(th->doff*4)-sizeof(struct tcphdr);
1929         int mss_seen = 0;
1930     
1931         ptr = (unsigned char *)(th + 1);
1932   
1933         while(length>0)
1934         {
1935                 int opcode=*ptr++;
1936                 int opsize=*ptr++;
1937                 switch(opcode)
1938                 {
1939                         case TCPOPT_EOL:
1940                                 return;
1941                         case TCPOPT_NOP:
1942                                 length-=2;
1943                                 continue;
1944                         
1945                         default:
1946                                 if(opsize<=2)   /* Avoid silly options looping forever */
1947                                         return;
1948                                 switch(opcode)
1949                                 {
1950                                         case TCPOPT_MSS:
1951                                                 if(opsize==4 && th->syn)
1952                                                 {
1953                                                         sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1954                                                         mss_seen = 1;
1955                                                 }
1956                                                 break;
1957                                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1958                                 }
1959                                 ptr+=opsize-2;
1960                                 length-=opsize;
1961                 }
1962         }
1963         if (th->syn) 
1964         {
1965                 if (! mss_seen)
1966                       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1967         }
1968 #ifdef CONFIG_INET_PCTCP
1969         sk->mss = min(sk->max_window >> 1, sk->mtu);
1970 #else    
1971         sk->mss = min(sk->max_window, sk->mtu);
1972 #endif  
1973 }
1974 
1975 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1976 {
1977         dst = ntohl(dst);
1978         if (IN_CLASSA(dst))
1979                 return htonl(IN_CLASSA_NET);
1980         if (IN_CLASSB(dst))
1981                 return htonl(IN_CLASSB_NET);
1982         return htonl(IN_CLASSC_NET);
1983 }
1984 
1985 /*
1986  *      Default sequence number picking algorithm.
1987  */
1988 
1989 extern inline long tcp_init_seq(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1990 {
1991         return jiffies * SEQ_TICK - seq_offset; 
1992 }
1993 
1994 /*
1995  *      This routine handles a connection request.
1996  *      It should make sure we haven't already responded.
1997  *      Because of the way BSD works, we have to send a syn/ack now.
1998  *      This also means it will be harder to close a socket which is
1999  *      listening.
2000  */
2001  
2002 static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
2003                  unsigned long daddr, unsigned long saddr,
2004                  struct options *opt, struct device *dev, unsigned long seq)
2005 {
2006         struct sk_buff *buff;
2007         struct tcphdr *t1;
2008         unsigned char *ptr;
2009         struct sock *newsk;
2010         struct tcphdr *th;
2011         struct device *ndev=NULL;
2012         int tmp;
2013         struct rtable *rt;
2014   
2015         th = skb->h.th;
2016 
2017         /* If the socket is dead, don't accept the connection. */
2018         if (!sk->dead) 
2019         {
2020                 sk->data_ready(sk,0);
2021         }
2022         else 
2023         {
2024                 if(sk->debug)
2025                         printk("Reset on %p: Connect on dead socket.\n",sk);
2026                 tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
2027                 tcp_statistics.TcpAttemptFails++;
2028                 kfree_skb(skb, FREE_READ);
2029                 return;
2030         }
2031 
2032         /*
2033          * Make sure we can accept more.  This will prevent a
2034          * flurry of syns from eating up all our memory.
2035          */
2036 
2037         if (sk->ack_backlog >= sk->max_ack_backlog) 
2038         {
2039                 tcp_statistics.TcpAttemptFails++;
2040                 kfree_skb(skb, FREE_READ);
2041                 return;
2042         }
2043 
2044         /*
2045          * We need to build a new sock struct.
2046          * It is sort of bad to have a socket without an inode attached
2047          * to it, but the wake_up's will just wake up the listening socket,
2048          * and if the listening socket is destroyed before this is taken
2049          * off of the queue, this will take care of it.
2050          */
2051 
2052         newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
2053         if (newsk == NULL) 
2054         {
2055                 /* just ignore the syn.  It will get retransmitted. */
2056                 tcp_statistics.TcpAttemptFails++;
2057                 kfree_skb(skb, FREE_READ);
2058                 return;
2059         }
2060 
2061         memcpy(newsk, sk, sizeof(*newsk));
2062         skb_queue_head_init(&newsk->write_queue);
2063         skb_queue_head_init(&newsk->receive_queue);
2064         newsk->send_head = NULL;
2065         newsk->send_tail = NULL;
2066         skb_queue_head_init(&newsk->back_log);
2067         newsk->rtt = 0;         /*TCP_CONNECT_TIME<<3*/
2068         newsk->rto = TCP_TIMEOUT_INIT;
2069         newsk->mdev = 0;
2070         newsk->max_window = 0;
2071         newsk->cong_window = 1;
2072         newsk->cong_count = 0;
2073         newsk->ssthresh = 0;
2074         newsk->backoff = 0;
2075         newsk->blog = 0;
2076         newsk->intr = 0;
2077         newsk->proc = 0;
2078         newsk->done = 0;
2079         newsk->partial = NULL;
2080         newsk->pair = NULL;
2081         newsk->wmem_alloc = 0;
2082         newsk->rmem_alloc = 0;
2083         newsk->localroute = sk->localroute;
2084 
2085         newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
2086 
2087         newsk->err = 0;
2088         newsk->shutdown = 0;
2089         newsk->ack_backlog = 0;
2090         newsk->acked_seq = skb->h.th->seq+1;
2091         newsk->fin_seq = skb->h.th->seq;
2092         newsk->copied_seq = skb->h.th->seq;
2093         newsk->state = TCP_SYN_RECV;
2094         newsk->timeout = 0;
2095         newsk->write_seq = seq; 
2096         newsk->window_seq = newsk->write_seq;
2097         newsk->rcv_ack_seq = newsk->write_seq;
2098         newsk->urg_data = 0;
2099         newsk->retransmits = 0;
2100         newsk->destroy = 0;
2101         init_timer(&newsk->timer);
2102         newsk->timer.data = (unsigned long)newsk;
2103         newsk->timer.function = &net_timer;
2104         newsk->dummy_th.source = skb->h.th->dest;
2105         newsk->dummy_th.dest = skb->h.th->source;
2106         
2107         /*
2108          *      Swap these two, they are from our point of view. 
2109          */
2110          
2111         newsk->daddr = saddr;
2112         newsk->saddr = daddr;
2113 
2114         put_sock(newsk->num,newsk);
2115         newsk->dummy_th.res1 = 0;
2116         newsk->dummy_th.doff = 6;
2117         newsk->dummy_th.fin = 0;
2118         newsk->dummy_th.syn = 0;
2119         newsk->dummy_th.rst = 0;        
2120         newsk->dummy_th.psh = 0;
2121         newsk->dummy_th.ack = 0;
2122         newsk->dummy_th.urg = 0;
2123         newsk->dummy_th.res2 = 0;
2124         newsk->acked_seq = skb->h.th->seq + 1;
2125         newsk->copied_seq = skb->h.th->seq;
2126         newsk->socket = NULL;
2127 
2128         /*
2129          *      Grab the ttl and tos values and use them 
2130          */
2131 
2132         newsk->ip_ttl=sk->ip_ttl;
2133         newsk->ip_tos=skb->ip_hdr->tos;
2134 
2135         /*
2136          *      Use 512 or whatever user asked for 
2137          */
2138 
2139         /*
2140          *      Note use of sk->user_mss, since user has no direct access to newsk 
2141          */
2142 
2143         rt=ip_rt_route(saddr, NULL,NULL);
2144         
2145         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
2146                 newsk->window_clamp = rt->rt_window;
2147         else
2148                 newsk->window_clamp = 0;
2149                 
2150         if (sk->user_mss)
2151                 newsk->mtu = sk->user_mss;
2152         else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
2153                 newsk->mtu = rt->rt_mss - HEADER_SIZE;
2154         else 
2155         {
2156 #ifdef CONFIG_INET_SNARL        /* Sub Nets Are Local */
2157                 if ((saddr ^ daddr) & default_mask(saddr))
2158 #else
2159                 if ((saddr ^ daddr) & dev->pa_mask)
2160 #endif
2161                         newsk->mtu = 576 - HEADER_SIZE;
2162                 else
2163                         newsk->mtu = MAX_WINDOW;
2164         }
2165 
2166         /*
2167          *      But not bigger than device MTU 
2168          */
2169 
2170         newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
2171 
2172         /*
2173          *      This will min with what arrived in the packet 
2174          */
2175 
2176         tcp_options(newsk,skb->h.th);
2177 
2178         buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
2179         if (buff == NULL) 
2180         {
2181                 sk->err = -ENOMEM;
2182                 newsk->dead = 1;
2183                 release_sock(newsk);
2184                 kfree_skb(skb, FREE_READ);
2185                 tcp_statistics.TcpAttemptFails++;
2186                 return;
2187         }
2188   
2189         buff->len = sizeof(struct tcphdr)+4;
2190         buff->sk = newsk;
2191         buff->localroute = newsk->localroute;
2192 
2193         t1 =(struct tcphdr *) buff->data;
2194 
2195         /*
2196          *      Put in the IP header and routing stuff. 
2197          */
2198 
2199         tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
2200                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2201 
2202         /*
2203          *      Something went wrong. 
2204          */
2205 
2206         if (tmp < 0) 
2207         {
2208                 sk->err = tmp;
2209                 buff->free=1;
2210                 kfree_skb(buff,FREE_WRITE);
2211                 newsk->dead = 1;
2212                 release_sock(newsk);
2213                 skb->sk = sk;
2214                 kfree_skb(skb, FREE_READ);
2215                 tcp_statistics.TcpAttemptFails++;
2216                 return;
2217         }
2218 
2219         buff->len += tmp;
2220         t1 =(struct tcphdr *)((char *)t1 +tmp);
2221   
2222         memcpy(t1, skb->h.th, sizeof(*t1));
2223         buff->h.seq = newsk->write_seq;
2224         /*
2225          *      Swap the send and the receive. 
2226          */
2227         t1->dest = skb->h.th->source;
2228         t1->source = newsk->dummy_th.source;
2229         t1->seq = ntohl(newsk->write_seq++);
2230         t1->ack = 1;
2231         newsk->window = tcp_select_window(newsk);
2232         newsk->sent_seq = newsk->write_seq;
2233         t1->window = ntohs(newsk->window);
2234         t1->res1 = 0;
2235         t1->res2 = 0;
2236         t1->rst = 0;
2237         t1->urg = 0;
2238         t1->psh = 0;
2239         t1->syn = 1;
2240         t1->ack_seq = ntohl(skb->h.th->seq+1);
2241         t1->doff = sizeof(*t1)/4+1;
2242         ptr =(unsigned char *)(t1+1);
2243         ptr[0] = 2;
2244         ptr[1] = 4;
2245         ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2246         ptr[3] =(newsk->mtu) & 0xff;
2247 
2248         tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2249         newsk->prot->queue_xmit(newsk, ndev, buff, 0);
2250 
2251         reset_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT);
2252         skb->sk = newsk;
2253 
2254         /*
2255          *      Charge the sock_buff to newsk. 
2256          */
2257          
2258         sk->rmem_alloc -= skb->mem_len;
2259         newsk->rmem_alloc += skb->mem_len;
2260         
2261         skb_queue_tail(&sk->receive_queue,skb);
2262         sk->ack_backlog++;
2263         release_sock(newsk);
2264         tcp_statistics.TcpOutSegs++;
2265 }
2266 
2267 
2268 static void tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2269 {
2270         struct sk_buff *buff;
2271         struct tcphdr *t1, *th;
2272         struct proto *prot;
2273         struct device *dev=NULL;
2274         int tmp;
2275 
2276         /*
2277          * We need to grab some memory, and put together a FIN, 
2278          * and then put it into the queue to be sent.
2279          */
2280         sk->inuse = 1;
2281         sk->keepopen = 1;
2282         sk->shutdown = SHUTDOWN_MASK;
2283 
2284         if (!sk->dead) 
2285                 sk->state_change(sk);
2286 
2287         if (timeout == 0) 
2288         {
2289                 /*
2290                  *  We need to flush the recv. buffs.  We do this only on the
2291                  *  descriptor close, not protocol-sourced closes, because the
2292                  *  reader process may not have drained the data yet!
2293                  */
2294 
2295                 if (skb_peek(&sk->receive_queue) != NULL) 
2296                 {
2297                         struct sk_buff *skb;
2298                         if(sk->debug)
2299                                 printk("Clean rcv queue\n");
2300                         while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
2301                                 kfree_skb(skb, FREE_READ);
2302                         if(sk->debug)
2303                                 printk("Cleaned.\n");
2304                 }
2305         }
2306 
2307         /*
2308          *      Get rid off any half-completed packets. 
2309          */
2310          
2311         if (sk->partial) 
2312         {
2313                 tcp_send_partial(sk);
2314         }
2315 
2316         switch(sk->state) 
2317         {
2318                 case TCP_FIN_WAIT1:
2319                 case TCP_FIN_WAIT2:
2320                 case TCP_CLOSING:
2321                         /*
2322                          * These states occur when we have already closed out
2323                          * our end.  If there is no timeout, we do not do
2324                          * anything.  We may still be in the middle of sending
2325                          * the remainder of our buffer, for example...
2326                          * resetting the timer would be inappropriate.
2327                          *
2328                          * XXX if retransmit count reaches limit, is tcp_close()
2329                          * called with timeout == 1 ? if not, we need to fix that.
2330                          */
2331                         if (!timeout) {
2332                                 int timer_active;
2333 
2334                                 timer_active = del_timer(&sk->timer);
2335                                 if (timer_active)
2336                                         add_timer(&sk->timer);
2337                                 else
2338                                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2339                         }
2340                         if (timeout) 
2341                                 tcp_time_wait(sk);
2342                         release_sock(sk);
2343                         return; /* break causes a double release - messy */
2344                 case TCP_TIME_WAIT:
2345                 case TCP_LAST_ACK:
2346                         /*
2347                          * A timeout from these states terminates the TCB.
2348                          */
2349                         if (timeout) 
2350                         {
2351                                 tcp_set_state(sk,TCP_CLOSE);
2352                         }
2353                         release_sock(sk);
2354                         return;
2355                 case TCP_LISTEN:
2356                         /* we need to drop any sockets which have been connected,
2357                            but have not yet been accepted. */
2358                         tcp_close_pending(sk, timeout);
2359                         tcp_set_state(sk,TCP_CLOSE);
2360                         release_sock(sk);
2361                         return;
2362                 case TCP_CLOSE:
2363                         release_sock(sk);
2364                         return;
2365                 case TCP_CLOSE_WAIT:
2366                 case TCP_ESTABLISHED:
2367                 case TCP_SYN_SENT:
2368                 case TCP_SYN_RECV:
2369                         prot =(struct proto *)sk->prot;
2370                         th =(struct tcphdr *)&sk->dummy_th;
2371                         buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2372                         if (buff == NULL) 
2373                         {
2374                                 /* This will force it to try again later. */
2375                                 /* Or it would have if someone released the socket
2376                                    first. Anyway it might work now */
2377                                 release_sock(sk);
2378                                 if (sk->state != TCP_CLOSE_WAIT)
2379                                         tcp_set_state(sk,TCP_ESTABLISHED);
2380                                 reset_timer(sk, TIME_CLOSE, 100);
2381                                 return;
2382                         }
2383                         buff->sk = sk;
2384                         buff->free = 1;
2385                         buff->len = sizeof(*t1);
2386                         buff->localroute = sk->localroute;
2387                         t1 =(struct tcphdr *) buff->data;
2388         
2389                         /*
2390                          *      Put in the IP header and routing stuff. 
2391                          */
2392                         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2393                                          IPPROTO_TCP, sk->opt,
2394                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2395                         if (tmp < 0) 
2396                         {
2397                                 sk->write_seq++;        /* Very important 8) */
2398                                 kfree_skb(buff,FREE_WRITE);
2399 
2400                                 /*
2401                                  * Enter FIN_WAIT1 to await completion of
2402                                  * written out data and ACK to our FIN.
2403                                  */
2404 
2405                                 if(sk->state==TCP_ESTABLISHED)
2406                                         tcp_set_state(sk,TCP_FIN_WAIT1);
2407                                 else
2408                                         tcp_set_state(sk,TCP_FIN_WAIT2);
2409                                 reset_timer(sk, TIME_CLOSE,4*sk->rto);
2410                                 if(timeout)
2411                                         tcp_time_wait(sk);
2412 
2413                                 release_sock(sk);
2414                                 return;
2415                         }
2416 
2417                         t1 =(struct tcphdr *)((char *)t1 +tmp);
2418                         buff->len += tmp;
2419                         buff->dev = dev;
2420                         memcpy(t1, th, sizeof(*t1));
2421                         t1->seq = ntohl(sk->write_seq);
2422                         sk->write_seq++;
2423                         buff->h.seq = sk->write_seq;
2424                         t1->ack = 1;
2425         
2426                         /* 
2427                          *      Ack everything immediately from now on. 
2428                          */
2429 
2430                         sk->delay_acks = 0;
2431                         t1->ack_seq = ntohl(sk->acked_seq);
2432                         t1->window = ntohs(sk->window=tcp_select_window(sk));
2433                         t1->fin = 1;
2434                         t1->rst = 0;
2435                         t1->doff = sizeof(*t1)/4;
2436                         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2437 
2438                         tcp_statistics.TcpOutSegs++;
2439         
2440                         if (skb_peek(&sk->write_queue) == NULL) 
2441                         {
2442                                 sk->sent_seq = sk->write_seq;
2443                                 prot->queue_xmit(sk, dev, buff, 0);
2444                         } 
2445                         else 
2446                         {
2447                                 reset_timer(sk, TIME_WRITE, sk->rto);
2448                                 if (buff->next != NULL) 
2449                                 {
2450                                         printk("tcp_close: next != NULL\n");
2451                                         skb_unlink(buff);
2452                                 }
2453                                 skb_queue_tail(&sk->write_queue, buff);
2454                         }
2455 
2456                         /*
2457                          * If established (normal close), enter FIN_WAIT1.
2458                          * If in CLOSE_WAIT, enter LAST_ACK
2459                          * If in CLOSING, remain in CLOSING
2460                          * otherwise enter FIN_WAIT2
2461                          */
2462 
2463                         if (sk->state == TCP_ESTABLISHED)
2464                                 tcp_set_state(sk,TCP_FIN_WAIT1);
2465                         else if (sk->state == TCP_CLOSE_WAIT)
2466                                 tcp_set_state(sk,TCP_LAST_ACK);
2467                         else if (sk->state != TCP_CLOSING)
2468                                 tcp_set_state(sk,TCP_FIN_WAIT2);
2469         }
2470         release_sock(sk);
2471 }
2472 
2473 
2474 /*
2475  * This routine takes stuff off of the write queue,
2476  * and puts it in the xmit queue.
2477  */
2478 static void
2479 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2480 {
2481         struct sk_buff *skb;
2482 
2483         /*
2484          *      The bytes will have to remain here. In time closedown will
2485          *      empty the write queue and all will be happy 
2486          */
2487 
2488         if(sk->zapped)
2489                 return;
2490 
2491         while((skb = skb_peek(&sk->write_queue)) != NULL &&
2492                 before(skb->h.seq, sk->window_seq + 1) &&
2493                 (sk->retransmits == 0 ||
2494                  sk->timeout != TIME_WRITE ||
2495                  before(skb->h.seq, sk->rcv_ack_seq + 1))
2496                 && sk->packets_out < sk->cong_window) 
2497         {
2498                 IS_SKB(skb);
2499                 skb_unlink(skb);
2500                 /* See if we really need to send the packet. */
2501                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) 
2502                 {
2503                         sk->retransmits = 0;
2504                         kfree_skb(skb, FREE_WRITE);
2505                         if (!sk->dead) 
2506                                 sk->write_space(sk);
2507                 } 
2508                 else
2509                 {
2510                         struct tcphdr *th;
2511                         struct iphdr *iph;
2512                         int size;
2513 /*
2514  * put in the ack seq and window at this point rather than earlier,
2515  * in order to keep them monotonic.  We really want to avoid taking
2516  * back window allocations.  That's legal, but RFC1122 says it's frowned on.
2517  * Ack and window will in general have changed since this packet was put
2518  * on the write queue.
2519  */
2520                         iph = (struct iphdr *)(skb->data +
2521                                                skb->dev->hard_header_len);
2522                         th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
2523                         size = skb->len - (((unsigned char *) th) - skb->data);
2524                         
2525                         th->ack_seq = ntohl(sk->acked_seq);
2526                         th->window = ntohs(tcp_select_window(sk));
2527 
2528                         tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
2529 
2530                         sk->sent_seq = skb->h.seq;
2531                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2532                 }
2533         }
2534 }
2535 
2536 
2537 /*
2538  *      This routine deals with incoming acks, but not outgoing ones.
2539  */
2540 
2541 static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2542 {
2543         unsigned long ack;
2544         int flag = 0;
2545 
2546         /* 
2547          * 1 - there was data in packet as well as ack or new data is sent or 
2548          *     in shutdown state
2549          * 2 - data from retransmit queue was acked and removed
2550          * 4 - window shrunk or data from retransmit queue was acked and removed
2551          */
2552 
2553         if(sk->zapped)
2554                 return(1);      /* Dead, cant ack any more so why bother */
2555 
2556         ack = ntohl(th->ack_seq);
2557         if (ntohs(th->window) > sk->max_window) 
2558         {
2559                 sk->max_window = ntohs(th->window);
2560 #ifdef CONFIG_INET_PCTCP
2561                 sk->mss = min(sk->max_window>>1, sk->mtu);
2562 #else
2563                 sk->mss = min(sk->max_window, sk->mtu);
2564 #endif  
2565         }
2566 
2567         if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2568                 sk->retransmits = 0;
2569 
2570         if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
2571         {
2572                 if(sk->debug)
2573                         printk("Ack ignored %lu %lu\n",ack,sk->sent_seq);
2574                         
2575                 /*
2576                  *      Keepalive processing.
2577                  */
2578                  
2579                 if (after(ack, sk->sent_seq) || (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) 
2580                 {
2581                         return(0);
2582                 }
2583                 if (sk->keepopen) 
2584                 {
2585                         if(sk->timeout==TIME_KEEPOPEN)
2586                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2587                 }
2588                 return(1);
2589         }
2590 
2591         if (len != th->doff*4) 
2592                 flag |= 1;
2593 
2594         /* See if our window has been shrunk. */
2595 
2596         if (after(sk->window_seq, ack+ntohs(th->window))) 
2597         {
2598                 /*
2599                  * We may need to move packets from the send queue
2600                  * to the write queue, if the window has been shrunk on us.
2601                  * The RFC says you are not allowed to shrink your window
2602                  * like this, but if the other end does, you must be able
2603                  * to deal with it.
2604                  */
2605                 struct sk_buff *skb;
2606                 struct sk_buff *skb2;
2607                 struct sk_buff *wskb = NULL;
2608         
2609                 skb2 = sk->send_head;
2610                 sk->send_head = NULL;
2611                 sk->send_tail = NULL;
2612         
2613                 flag |= 4;
2614         
2615                 sk->window_seq = ack + ntohs(th->window);
2616                 cli();
2617                 while (skb2 != NULL) 
2618                 {
2619                         skb = skb2;
2620                         skb2 = skb->link3;
2621                         skb->link3 = NULL;
2622                         if (after(skb->h.seq, sk->window_seq)) 
2623                         {
2624                                 if (sk->packets_out > 0) 
2625                                         sk->packets_out--;
2626                                 /* We may need to remove this from the dev send list. */
2627                                 if (skb->next != NULL) 
2628                                 {
2629                                         skb_unlink(skb);                                
2630                                 }
2631                                 /* Now add it to the write_queue. */
2632                                 if (wskb == NULL)
2633                                         skb_queue_head(&sk->write_queue,skb);
2634                                 else
2635                                         skb_append(wskb,skb);
2636                                 wskb = skb;
2637                         } 
2638                         else 
2639                         {
2640                                 if (sk->send_head == NULL) 
2641                                 {
2642                                         sk->send_head = skb;
2643                                         sk->send_tail = skb;
2644                                 }
2645                                 else
2646                                 {
2647                                         sk->send_tail->link3 = skb;
2648                                         sk->send_tail = skb;
2649                                 }
2650                                 skb->link3 = NULL;
2651                         }
2652                 }
2653                 sti();
2654         }
2655 
2656         if (sk->send_tail == NULL || sk->send_head == NULL) 
2657         {
2658                 sk->send_head = NULL;
2659                 sk->send_tail = NULL;
2660                 sk->packets_out= 0;
2661         }
2662 
2663         sk->window_seq = ack + ntohs(th->window);
2664 
2665         /* We don't want too many packets out there. */
2666         if (sk->timeout == TIME_WRITE && 
2667                 sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
2668         {
2669 /* 
2670  * This is Jacobson's slow start and congestion avoidance. 
2671  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2672  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2673  * counter and increment it once every cwnd times.  It's possible
2674  * that this should be done only if sk->retransmits == 0.  I'm
2675  * interpreting "new data is acked" as including data that has
2676  * been retransmitted but is just now being acked.
2677  */
2678                 if (sk->cong_window < sk->ssthresh)  
2679                   /* 
2680                    *    In "safe" area, increase
2681                    */
2682                         sk->cong_window++;
2683                 else 
2684                 {
2685                   /*
2686                    *    In dangerous area, increase slowly.  In theory this is
2687                    *    sk->cong_window += 1 / sk->cong_window
2688                    */
2689                         if (sk->cong_count >= sk->cong_window) 
2690                         {
2691                                 sk->cong_window++;
2692                                 sk->cong_count = 0;
2693                         }
2694                         else 
2695                                 sk->cong_count++;
2696                 }
2697         }
2698 
2699         sk->rcv_ack_seq = ack;
2700 
2701         /*
2702          * if this ack opens up a zero window, clear backoff.  It was
2703          * being used to time the probes, and is probably far higher than
2704          * it needs to be for normal retransmission.
2705          */
2706 
2707         if (sk->timeout == TIME_PROBE0) 
2708         {
2709                 if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
2710                     ! before (sk->window_seq, sk->write_queue.next->h.seq)) 
2711                 {
2712                         sk->retransmits = 0;
2713                         sk->backoff = 0;
2714                   /*
2715                    *    Recompute rto from rtt.  this eliminates any backoff.
2716                    */
2717 
2718                         sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2719                         if (sk->rto > 120*HZ)
2720                                 sk->rto = 120*HZ;
2721                         if (sk->rto < 20)       /* Was 1*HZ, then 1 - turns out we must allow about
2722                                                    .2 of a second because of BSD delayed acks - on a 100Mb/sec link
2723                                                    .2 of a second is going to need huge windows (SIGH) */
2724                                 sk->rto = 20;
2725                 }
2726         }
2727 
2728   /* 
2729    *    See if we can take anything off of the retransmit queue.
2730    */
2731    
2732         while(sk->send_head != NULL) 
2733         {
2734                 /* Check for a bug. */
2735                 if (sk->send_head->link3 &&
2736                     after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) 
2737                         printk("INET: tcp.c: *** bug send_list out of order.\n");
2738                 if (before(sk->send_head->h.seq, ack+1)) 
2739                 {
2740                         struct sk_buff *oskb;   
2741                         if (sk->retransmits) 
2742                         {       
2743                                 /*
2744                                  *      We were retransmitting.  don't count this in RTT est 
2745                                  */
2746                                 flag |= 2;
2747 
2748                                 /*
2749                                  * even though we've gotten an ack, we're still
2750                                  * retransmitting as long as we're sending from
2751                                  * the retransmit queue.  Keeping retransmits non-zero
2752                                  * prevents us from getting new data interspersed with
2753                                  * retransmissions.
2754                                  */
2755 
2756                                 if (sk->send_head->link3)
2757                                         sk->retransmits = 1;
2758                                 else
2759                                         sk->retransmits = 0;
2760                         }
2761                         /*
2762                          * Note that we only reset backoff and rto in the
2763                          * rtt recomputation code.  And that doesn't happen
2764                          * if there were retransmissions in effect.  So the
2765                          * first new packet after the retransmissions is
2766                          * sent with the backoff still in effect.  Not until
2767                          * we get an ack from a non-retransmitted packet do
2768                          * we reset the backoff and rto.  This allows us to deal
2769                          * with a situation where the network delay has increased
2770                          * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2771                          */
2772 
2773                         /*
2774                          *      We have one less packet out there. 
2775                          */
2776                          
2777                         if (sk->packets_out > 0) 
2778                                 sk->packets_out --;
2779                         /* 
2780                          *      Wake up the process, it can probably write more. 
2781                          */
2782                         if (!sk->dead) 
2783                                 sk->write_space(sk);
2784                         oskb = sk->send_head;
2785 
2786                         if (!(flag&2)) 
2787                         {
2788                                 long m;
2789         
2790                                 /*
2791                                  *      The following amusing code comes from Jacobson's
2792                                  *      article in SIGCOMM '88.  Note that rtt and mdev
2793                                  *      are scaled versions of rtt and mean deviation.
2794                                  *      This is designed to be as fast as possible 
2795                                  *      m stands for "measurement".
2796                                  */
2797         
2798                                 m = jiffies - oskb->when;  /* RTT */
2799                                 if(m<=0)
2800                                         m=1;            /* IS THIS RIGHT FOR <0 ??? */
2801                                 m -= (sk->rtt >> 3);    /* m is now error in rtt est */
2802                                 sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
2803                                 if (m < 0)
2804                                         m = -m;         /* m is now abs(error) */
2805                                 m -= (sk->mdev >> 2);   /* similar update on mdev */
2806                                 sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
2807         
2808                                 /*
2809                                  *      Now update timeout.  Note that this removes any backoff.
2810                                  */
2811                          
2812                                 sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2813                                 if (sk->rto > 120*HZ)
2814                                         sk->rto = 120*HZ;
2815                                 if (sk->rto < 20)       /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
2816                                         sk->rto = 20;
2817                                 sk->backoff = 0;
2818                         }
2819                         flag |= (2|4);
2820                         cli();
2821                         oskb = sk->send_head;
2822                         IS_SKB(oskb);
2823                         sk->send_head = oskb->link3;
2824                         if (sk->send_head == NULL) 
2825                         {
2826                                 sk->send_tail = NULL;
2827                         }
2828 
2829                 /*
2830                  *      We may need to remove this from the dev send list. 
2831                  */
2832 
2833                         if (oskb->next)
2834                                 skb_unlink(oskb);
2835                         sti();
2836                         kfree_skb(oskb, FREE_WRITE); /* write. */
2837                         if (!sk->dead) 
2838                                 sk->write_space(sk);
2839                 }
2840                 else
2841                 {
2842                         break;
2843                 }
2844         }
2845 
2846         /*
2847          * XXX someone ought to look at this too.. at the moment, if skb_peek()
2848          * returns non-NULL, we complete ignore the timer stuff in the else
2849          * clause.  We ought to organize the code so that else clause can
2850          * (should) be executed regardless, possibly moving the PROBE timer
2851          * reset over.  The skb_peek() thing should only move stuff to the
2852          * write queue, NOT also manage the timer functions.
2853          */
2854 
2855         /*
2856          * Maybe we can take some stuff off of the write queue,
2857          * and put it onto the xmit queue.
2858          */
2859         if (skb_peek(&sk->write_queue) != NULL) 
2860         {
2861                 if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
2862                         (sk->retransmits == 0 || 
2863                          sk->timeout != TIME_WRITE ||
2864                          before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
2865                         && sk->packets_out < sk->cong_window) 
2866                 {
2867                         flag |= 1;
2868                         tcp_write_xmit(sk);
2869                 }
2870                 else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
2871                         sk->send_head == NULL &&
2872                         sk->ack_backlog == 0 &&
2873                         sk->state != TCP_TIME_WAIT) 
2874                 {
2875                         reset_timer(sk, TIME_PROBE0, sk->rto);
2876                 }               
2877         }
2878         else
2879         {
2880                 /*
2881                  * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
2882                  * from TCP_CLOSE we don't do anything
2883                  *
2884                  * from anything else, if there is write data (or fin) pending,
2885                  * we use a TIME_WRITE timeout, else if keepalive we reset to
2886                  * a KEEPALIVE timeout, else we delete the timer.
2887                  *
2888                  * We do not set flag for nominal write data, otherwise we may
2889                  * force a state where we start to write itsy bitsy tidbits
2890                  * of data.
2891                  */
2892 
2893                 switch(sk->state) {
2894                 case TCP_TIME_WAIT:
2895                         /*
2896                          * keep us in TIME_WAIT until we stop getting packets,
2897                          * reset the timeout.
2898                          */
2899                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2900                         break;
2901                 case TCP_CLOSE:
2902                         /*
2903                          * don't touch the timer.
2904                          */
2905                         break;
2906                 default:
2907                         /*
2908                          * must check send_head, write_queue, and ack_backlog
2909                          * to determine which timeout to use.
2910                          */
2911                         if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) {
2912                                 reset_timer(sk, TIME_WRITE, sk->rto);
2913                         } else if (sk->keepopen) {
2914                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2915                         } else {
2916                                 delete_timer(sk);
2917                         }
2918                         break;
2919                 }
2920 #ifdef NOTDEF
2921                 if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2922                 sk->state != TCP_TIME_WAIT && !sk->keepopen) 
2923                 {
2924                         if (!sk->dead)
2925                                 sk->write_space(sk);
2926                         if (sk->keepopen) {
2927                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2928                         } else {
2929                                 delete_timer(sk);
2930                         }
2931                 }
2932                 else
2933                 {
2934                         if (sk->state != (unsigned char) sk->keepopen) 
2935                         {
2936                                 reset_timer(sk, TIME_WRITE, sk->rto);
2937                         }
2938                         if (sk->state == TCP_TIME_WAIT) 
2939                         {
2940                                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2941                         }       
2942                 }
2943 #endif
2944         }
2945 
2946         if (sk->packets_out == 0 && sk->partial != NULL &&
2947                 skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
2948         {
2949                 flag |= 1;
2950                 tcp_send_partial(sk);
2951         }
2952 
2953         /*
2954          * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
2955          * we are now waiting for an acknowledge to our FIN.  The other end is
2956          * already in TIME_WAIT.
2957          *
2958          * Move to TCP_CLOSE on success.
2959          */
2960 
2961         if (sk->state == TCP_LAST_ACK) 
2962         {
2963                 if (!sk->dead)
2964                         sk->state_change(sk);
2965                 if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) 
2966                 {
2967                         flag |= 1;
2968                         tcp_time_wait(sk);
2969                         sk->shutdown = SHUTDOWN_MASK;
2970                 }
2971         }
2972 
2973         /*
2974          * Incoming ACK to a FIN we sent in the case of our initiating the close.
2975          *
2976          * Move to FIN_WAIT2 to await a FIN from the other end. Set
2977          * SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in.
2978          */
2979 
2980         if (sk->state == TCP_FIN_WAIT1) 
2981         {
2982 
2983                 if (!sk->dead) 
2984                         sk->state_change(sk);
2985                 if (sk->rcv_ack_seq == sk->write_seq) 
2986                 {
2987                         flag |= 1;
2988                         sk->shutdown |= SEND_SHUTDOWN;
2989                         tcp_set_state(sk, TCP_FIN_WAIT2);
2990                 }
2991         }
2992 
2993         /*
2994          *      Incoming ACK to a FIN we sent in the case of a simultaneous close.
2995          *
2996          *      Move to TIME_WAIT
2997          */
2998 
2999         if (sk->state == TCP_CLOSING) 
3000         {
3001 
3002                 if (!sk->dead) 
3003                         sk->state_change(sk);
3004                 if (sk->rcv_ack_seq == sk->write_seq) 
3005                 {
3006                         flag |= 1;
3007                         tcp_time_wait(sk);
3008                 }
3009         }
3010 
3011         /*
3012          * I make no guarantees about the first clause in the following
3013          * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
3014          * what conditions "!flag" would be true.  However I think the rest
3015          * of the conditions would prevent that from causing any
3016          * unnecessary retransmission. 
3017          *   Clearly if the first packet has expired it should be 
3018          * retransmitted.  The other alternative, "flag&2 && retransmits", is
3019          * harder to explain:  You have to look carefully at how and when the
3020          * timer is set and with what timeout.  The most recent transmission always
3021          * sets the timer.  So in general if the most recent thing has timed
3022          * out, everything before it has as well.  So we want to go ahead and
3023          * retransmit some more.  If we didn't explicitly test for this
3024          * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
3025          * would not be true.  If you look at the pattern of timing, you can
3026          * show that rto is increased fast enough that the next packet would
3027          * almost never be retransmitted immediately.  Then you'd end up
3028          * waiting for a timeout to send each packet on the retransmission
3029          * queue.  With my implementation of the Karn sampling algorithm,
3030          * the timeout would double each time.  The net result is that it would
3031          * take a hideous amount of time to recover from a single dropped packet.
3032          * It's possible that there should also be a test for TIME_WRITE, but
3033          * I think as long as "send_head != NULL" and "retransmit" is on, we've
3034          * got to be in real retransmission mode.
3035          *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
3036          * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
3037          * As long as no further losses occur, this seems reasonable.
3038          */
3039         
3040         if (((!flag) || (flag&4)) && sk->send_head != NULL &&
3041                (((flag&2) && sk->retransmits) ||
3042                (sk->send_head->when + sk->rto < jiffies))) 
3043         {
3044                 ip_do_retransmit(sk, 1);
3045                 reset_timer(sk, TIME_WRITE, sk->rto);
3046         }
3047 
3048         return(1);
3049 }
3050 
3051 
3052 /*
3053  *      This routine handles the data.  If there is room in the buffer,
3054  *      it will be have already been moved into it.  If there is no
3055  *      room, then we will just have to discard the packet.
3056  */
3057 
3058 static int tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
3059          unsigned long saddr, unsigned short len)
3060 {
3061         struct sk_buff *skb1, *skb2;
3062         struct tcphdr *th;
3063         int dup_dumped=0;
3064         unsigned long new_seq;
3065         struct sk_buff *tail;
3066         unsigned long shut_seq;
3067 
3068         th = skb->h.th;
3069         skb->len = len -(th->doff*4);
3070 
3071         /* The bytes in the receive read/assembly queue has increased. Needed for the
3072            low memory discard algorithm */
3073            
3074         sk->bytes_rcv += skb->len;
3075         
3076         if (skb->len == 0 && !th->fin && !th->urg && !th->psh) 
3077         {
3078                 /* 
3079                  *      Don't want to keep passing ack's back and forth. 
3080                  *      (someone sent us dataless, boring frame)
3081                  */
3082                 if (!th->ack)
3083                         tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
3084                 kfree_skb(skb, FREE_READ);
3085                 return(0);
3086         }
3087         
3088         /*
3089          *      We no longer have anyone receiving data on this connection.
3090          */
3091 
3092         if(sk->shutdown & RCV_SHUTDOWN)
3093         {
3094                 new_seq= th->seq + skb->len + th->syn;  /* Right edge of _data_ part of frame */
3095                 
3096                 /*
3097                  *      This is subtle and not nice. When we shut down we can
3098                  *      have data in the queue and acked_seq therefore not
3099                  *      pointing to the last byte that will be read. Thus
3100                  *      the naive implementation:
3101                  *              after(new_seq,sk->acked_seq+1)
3102                  *      will cause bogus resets IFF a resend of a frame that has
3103                  *      been queued but not yet read after a shutdown has been done
3104                  *      occured.What we do now is a bit more complex but works as
3105                  *      follows. If the queue is empty copied_seq+1 is right (+1 for FIN)
3106                  *      if the queue has data the shutdown occurs at the right edge of
3107                  *      the last packet queued +1
3108                  *
3109                  *      We can't simply ack data beyond this point as it has
3110                  *      and will never be received by an application.
3111                  */
3112                 tail=skb_peek(&sk->receive_queue);
3113                 if(tail!=NULL)
3114                 {
3115                         tail=sk->receive_queue.prev;
3116                         shut_seq=tail->h.th->seq+tail->len+1;
3117                 }
3118                 else
3119                         shut_seq=sk->copied_seq+1;
3120                 
3121                 if(after(new_seq,shut_seq))
3122                 {
3123                         sk->acked_seq = new_seq + th->fin;
3124                         if(sk->debug)
3125                                 printk("Data arrived on %p after close [Data right edge %lX, Socket shut on %lX] %d\n",
3126                                         sk, new_seq, shut_seq, sk->blog);
3127                         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
3128                                 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
3129                         tcp_statistics.TcpEstabResets++;
3130                         tcp_set_state(sk,TCP_CLOSE);
3131                         sk->err = EPIPE;
3132                         sk->shutdown = SHUTDOWN_MASK;
3133                         kfree_skb(skb, FREE_READ);
3134                         if (!sk->dead)
3135                                 sk->state_change(sk);
3136                         return(0);
3137                 }
3138         }
3139         /*
3140          *      Now we have to walk the chain, and figure out where this one
3141          *      goes into it.  This is set up so that the last packet we received
3142          *      will be the first one we look at, that way if everything comes
3143          *      in order, there will be no performance loss, and if they come
3144          *      out of order we will be able to fit things in nicely.
3145          */
3146 
3147         /* 
3148          *      This should start at the last one, and then go around forwards.
3149          */
3150 
3151         if (skb_peek(&sk->receive_queue) == NULL)       /* Empty queue is easy case */
3152         {
3153                 skb_queue_head(&sk->receive_queue,skb);
3154                 skb1= NULL;
3155         } 
3156         else
3157         {
3158                 for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
3159                 {
3160                         if(sk->debug)
3161                         {
3162                                 printk("skb1=%p :", skb1);
3163                                 printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
3164                                 printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
3165                                 printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
3166                                                 sk->acked_seq);
3167                         }
3168                         
3169                         /*
3170                          *      Optimisation: Duplicate frame or extension of previous frame from
3171                          *      same sequence point (lost ack case).
3172                          *      The frame contains duplicate data or replaces a previous frame
3173                          *      discard the previous frame (safe as sk->inuse is set) and put
3174                          *      the new one in its place.
3175                          */
3176                          
3177                         if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
3178                         {
3179                                 skb_append(skb1,skb);
3180                                 skb_unlink(skb1);
3181                                 kfree_skb(skb1,FREE_READ);
3182                                 dup_dumped=1;
3183                                 skb1=NULL;
3184                                 break;
3185                         }
3186                         
3187                         /*
3188                          *      Found where it fits
3189                          */
3190                          
3191                         if (after(th->seq+1, skb1->h.th->seq))
3192                         {
3193                                 skb_append(skb1,skb);
3194                                 break;
3195                         }
3196                         
3197                         /*
3198                          *      See if we've hit the start. If so insert.
3199                          */
3200                         if (skb1 == skb_peek(&sk->receive_queue))
3201                         {
3202                                 skb_queue_head(&sk->receive_queue, skb);
3203                                 break;
3204                         }
3205                 }
3206         }
3207 
3208         /*
3209          *      Figure out what the ack value for this frame is
3210          */
3211          
3212         th->ack_seq = th->seq + skb->len;
3213         if (th->syn) 
3214                 th->ack_seq++;
3215         if (th->fin)
3216                 th->ack_seq++;
3217 
3218         if (before(sk->acked_seq, sk->copied_seq)) 
3219         {
3220                 printk("*** tcp.c:tcp_data bug acked < copied\n");
3221                 sk->acked_seq = sk->copied_seq;
3222         }
3223 
3224         /*
3225          *      Now figure out if we can ack anything.
3226          */
3227 
3228         if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) 
3229         {
3230                 if (before(th->seq, sk->acked_seq+1)) 
3231                 {
3232                         int newwindow;
3233 
3234                         if (after(th->ack_seq, sk->acked_seq)) 
3235                         {
3236                                 newwindow = sk->window-(th->ack_seq - sk->acked_seq);
3237                                 if (newwindow < 0)
3238                                         newwindow = 0;  
3239                                 sk->window = newwindow;
3240                                 sk->acked_seq = th->ack_seq;
3241                         }
3242                         skb->acked = 1;
3243 
3244                         /* 
3245                          *      When we ack the fin, we turn on the RCV_SHUTDOWN flag.
3246                          */
3247 
3248                         if (skb->h.th->fin) 
3249                         {
3250                                 if (!sk->dead) 
3251                                         sk->state_change(sk);
3252                                 sk->shutdown |= RCV_SHUTDOWN;
3253                         }
3254           
3255                         for(skb2 = skb->next;
3256                             skb2 != (struct sk_buff *)&sk->receive_queue;
3257                             skb2 = skb2->next) 
3258                         {
3259                                 if (before(skb2->h.th->seq, sk->acked_seq+1)) 
3260                                 {
3261                                         if (after(skb2->h.th->ack_seq, sk->acked_seq))
3262                                         {
3263                                                 newwindow = sk->window -
3264                                                  (skb2->h.th->ack_seq - sk->acked_seq);
3265                                                 if (newwindow < 0)
3266                                                         newwindow = 0;  
3267                                                 sk->window = newwindow;
3268                                                 sk->acked_seq = skb2->h.th->ack_seq;
3269                                         }
3270                                         skb2->acked = 1;
3271                                         /*
3272                                          *      When we ack the fin, we turn on
3273                                          *      the RCV_SHUTDOWN flag.
3274                                          */
3275                                         if (skb2->h.th->fin) 
3276                                         {
3277                                                 sk->shutdown |= RCV_SHUTDOWN;
3278                                                 if (!sk->dead)
3279                                                         sk->state_change(sk);
3280                                         }
3281 
3282                                         /*
3283                                          *      Force an immediate ack.
3284                                          */
3285                                          
3286                                         sk->ack_backlog = sk->max_ack_backlog;
3287                                 }
3288                                 else
3289                                 {
3290                                         break;
3291                                 }
3292                         }
3293 
3294                         /*
3295                          *      This also takes care of updating the window.
3296                          *      This if statement needs to be simplified.
3297                          */
3298                         if (!sk->delay_acks ||
3299                             sk->ack_backlog >= sk->max_ack_backlog || 
3300                             sk->bytes_rcv > sk->max_unacked || th->fin) {
3301         /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
3302                         }
3303                         else 
3304                         {
3305                                 sk->ack_backlog++;
3306                                 if(sk->debug)
3307                                         printk("Ack queued.\n");
3308                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3309                         }
3310                 }
3311         }
3312 
3313         /*
3314          *      If we've missed a packet, send an ack.
3315          *      Also start a timer to send another.
3316          */
3317          
3318         if (!skb->acked) 
3319         {
3320         
3321         /*
3322          *      This is important.  If we don't have much room left,
3323          *      we need to throw out a few packets so we have a good
3324          *      window.  Note that mtu is used, not mss, because mss is really
3325          *      for the send side.  He could be sending us stuff as large as mtu.
3326          */
3327                  
3328                 while (sk->prot->rspace(sk) < sk->mtu) 
3329                 {
3330                         skb1 = skb_peek(&sk->receive_queue);
3331                         if (skb1 == NULL) 
3332                         {
3333                                 printk("INET: tcp.c:tcp_data memory leak detected.\n");
3334                                 break;
3335                         }
3336 
3337                         /*
3338                          *      Don't throw out something that has been acked. 
3339                          */
3340                  
3341                         if (skb1->acked) 
3342                         {
3343                                 break;
3344                         }
3345                 
3346                         skb_unlink(skb1);
3347                         kfree_skb(skb1, FREE_READ);
3348                 }
3349                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3350                 sk->ack_backlog++;
3351                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3352         }
3353         else
3354         {
3355                 /* We missed a packet.  Send an ack to try to resync things. */
3356                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3357         }
3358 
3359         /*
3360          *      Now tell the user we may have some data. 
3361          */
3362          
3363         if (!sk->dead) 
3364         {
3365                 if(sk->debug)
3366                         printk("Data wakeup.\n");
3367                 sk->data_ready(sk,0);
3368         } 
3369         return(0);
3370 }
3371 
3372 
3373 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
3374 {
3375         unsigned long ptr = ntohs(th->urg_ptr);
3376 
3377         if (ptr)
3378                 ptr--;
3379         ptr += th->seq;
3380 
3381         /* ignore urgent data that we've already seen and read */
3382         if (after(sk->copied_seq+1, ptr))
3383                 return;
3384 
3385         /* do we already have a newer (or duplicate) urgent pointer? */
3386         if (sk->urg_data && !after(ptr, sk->urg_seq))
3387                 return;
3388 
3389         /* tell the world about our new urgent pointer */
3390         if (sk->proc != 0) {
3391                 if (sk->proc > 0) {
3392                         kill_proc(sk->proc, SIGURG, 1);
3393                 } else {
3394                         kill_pg(-sk->proc, SIGURG, 1);
3395                 }
3396         }
3397         sk->urg_data = URG_NOTYET;
3398         sk->urg_seq = ptr;
3399 }
3400 
3401 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
3402         unsigned long saddr, unsigned long len)
3403 {
3404         unsigned long ptr;
3405 
3406         /* check if we get a new urgent pointer */
3407         if (th->urg)
3408                 tcp_check_urg(sk,th);
3409 
3410         /* do we wait for any urgent data? */
3411         if (sk->urg_data != URG_NOTYET)
3412                 return 0;
3413 
3414         /* is the urgent pointer pointing into this packet? */
3415         ptr = sk->urg_seq - th->seq + th->doff*4;
3416         if (ptr >= len)
3417                 return 0;
3418 
3419         /* ok, got the correct packet, update info */
3420         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
3421         if (!sk->dead)
3422                 sk->data_ready(sk,0);
3423         return 0;
3424 }
3425 
3426 
3427 /*
3428  *  This deals with incoming fins. 'Linus at 9 O'clock' 8-) 
3429  *
3430  *  If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
3431  *  (and thence onto LAST-ACK and finally, CLOSE, we never enter
3432  *  TIME-WAIT)
3433  *
3434  *  If we are in FINWAIT-1, a received FIN indicates simultaneous
3435  *  close and we go into CLOSING (and later onto TIME-WAIT)
3436  *
3437  *  If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3438  *
3439  */
3440 static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
3441          unsigned long saddr, struct device *dev)
3442 {
3443         sk->fin_seq = th->seq + skb->len + th->syn + th->fin;
3444 
3445         if (!sk->dead) 
3446         {
3447                 sk->state_change(sk);
3448         }
3449 
3450         switch(sk->state) 
3451         {
3452                 case TCP_SYN_RECV:
3453                 case TCP_SYN_SENT:
3454                 case TCP_ESTABLISHED:
3455                         /*
3456                          * move to CLOSE_WAIT, tcp_data() already handled
3457                          * sending the ack.
3458                          */
3459                         reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
3460                         tcp_set_state(sk,TCP_CLOSE_WAIT);
3461                         if (th->rst)
3462                                 sk->shutdown = SHUTDOWN_MASK;
3463                         break;
3464 
3465                 case TCP_CLOSE_WAIT:
3466                 case TCP_CLOSING:
3467                         /*
3468                          * received a retransmission of the FIN, do
3469                          * nothing.
3470                          */
3471                         break;
3472                 case TCP_TIME_WAIT:
3473                         /*
3474                          * received a retransmission of the FIN,
3475                          * restart the TIME_WAIT timer.
3476                          */
3477                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3478                         return(0);
3479                 case TCP_FIN_WAIT1:
3480                         /*
3481                          * This case occurs when a simultaneous close
3482                          * happens, we must ack the received FIN and
3483                          * enter the CLOSING state.
3484                          *
3485                          * XXX timeout not set properly
3486                          */
3487 
3488                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3489                         tcp_set_state(sk,TCP_CLOSING);
3490                         break;
3491                 case TCP_FIN_WAIT2:
3492                         /*
3493                          * received a FIN -- send ACK and enter TIME_WAIT
3494                          */
3495                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3496                         sk->shutdown|=SHUTDOWN_MASK;
3497                         tcp_set_state(sk,TCP_TIME_WAIT);
3498                         break;
3499                 case TCP_CLOSE:
3500                         /*
3501                          * already in CLOSE
3502                          */
3503                         break;
3504                 default:
3505                         tcp_set_state(sk,TCP_LAST_ACK);
3506         
3507                         /* Start the timers. */
3508                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3509                         return(0);
3510         }
3511         sk->ack_backlog++;
3512 
3513         return(0);
3514 }
3515 
3516 
3517 /* This will accept the next outstanding connection. */
3518 static struct sock *
3519 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
3520 {
3521         struct sock *newsk;
3522         struct sk_buff *skb;
3523   
3524   /*
3525    * We need to make sure that this socket is listening,
3526    * and that it has something pending.
3527    */
3528 
3529         if (sk->state != TCP_LISTEN) 
3530         {
3531                 sk->err = EINVAL;
3532                 return(NULL); 
3533         }
3534 
3535         /* Avoid the race. */
3536         cli();
3537         sk->inuse = 1;
3538 
3539         while((skb = tcp_dequeue_established(sk)) == NULL) 
3540         {
3541                 if (flags & O_NONBLOCK) 
3542                 {
3543                         sti();
3544                         release_sock(sk);
3545                         sk->err = EAGAIN;
3546                         return(NULL);
3547                 }
3548 
3549                 release_sock(sk);
3550                 interruptible_sleep_on(sk->sleep);
3551                 if (current->signal & ~current->blocked) 
3552                 {
3553                         sti();
3554                         sk->err = ERESTARTSYS;
3555                         return(NULL);
3556                 }
3557                 sk->inuse = 1;
3558         }
3559         sti();
3560 
3561         /*
3562          *      Now all we need to do is return skb->sk. 
3563          */
3564 
3565         newsk = skb->sk;
3566 
3567         kfree_skb(skb, FREE_READ);
3568         sk->ack_backlog--;
3569         release_sock(sk);
3570         return(newsk);
3571 }
3572 
3573 
3574 /*
3575  *      This will initiate an outgoing connection. 
3576  */
3577  
3578 static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3579 {
3580         struct sk_buff *buff;
3581         struct device *dev=NULL;
3582         unsigned char *ptr;
3583         int tmp;
3584         int atype;
3585         struct tcphdr *t1;
3586         struct rtable *rt;
3587 
3588         if (sk->state != TCP_CLOSE) 
3589                 return(-EISCONN);
3590 
3591         if (addr_len < 8) 
3592                 return(-EINVAL);
3593 
3594         if (usin->sin_family && usin->sin_family != AF_INET) 
3595                 return(-EAFNOSUPPORT);
3596 
3597         /*
3598          *      connect() to INADDR_ANY means loopback (BSD'ism).
3599          */
3600         
3601         if(usin->sin_addr.s_addr==INADDR_ANY)
3602                 usin->sin_addr.s_addr=ip_my_addr();
3603                   
3604         /*
3605          *      Don't want a TCP connection going to a broadcast address 
3606          */
3607 
3608         if ((atype=ip_chk_addr(usin->sin_addr.s_addr)) == IS_BROADCAST || atype==IS_MULTICAST) 
3609                 return -ENETUNREACH;
3610   
3611         sk->inuse = 1;
3612         sk->daddr = usin->sin_addr.s_addr;
3613         sk->write_seq = jiffies * SEQ_TICK - seq_offset;
3614         sk->window_seq = sk->write_seq;
3615         sk->rcv_ack_seq = sk->write_seq -1;
3616         sk->err = 0;
3617         sk->dummy_th.dest = usin->sin_port;
3618         release_sock(sk);
3619 
3620         buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3621         if (buff == NULL) 
3622         {
3623                 return(-ENOMEM);
3624         }
3625         sk->inuse = 1;
3626         buff->len = 24;
3627         buff->sk = sk;
3628         buff->free = 1;
3629         buff->localroute = sk->localroute;
3630         
3631         t1 = (struct tcphdr *) buff->data;
3632 
3633         /*
3634          *      Put in the IP header and routing stuff. 
3635          */
3636          
3637         rt=ip_rt_route(sk->daddr, NULL, NULL);
3638         
3639 
3640         /*
3641          *      We need to build the routing stuff from the things saved in skb. 
3642          */
3643 
3644         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3645                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3646         if (tmp < 0) 
3647         {
3648                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3649                 release_sock(sk);
3650                 return(-ENETUNREACH);
3651         }
3652 
3653         buff->len += tmp;
3654         t1 = (struct tcphdr *)((char *)t1 +tmp);
3655 
3656         memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3657         t1->seq = ntohl(sk->write_seq++);
3658         sk->sent_seq = sk->write_seq;
3659         buff->h.seq = sk->write_seq;
3660         t1->ack = 0;
3661         t1->window = 2;
3662         t1->res1=0;
3663         t1->res2=0;
3664         t1->rst = 0;
3665         t1->urg = 0;
3666         t1->psh = 0;
3667         t1->syn = 1;
3668         t1->urg_ptr = 0;
3669         t1->doff = 6;
3670         /* use 512 or whatever user asked for */
3671         
3672         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
3673                 sk->window_clamp=rt->rt_window;
3674         else
3675                 sk->window_clamp=0;
3676 
3677         if (sk->user_mss)
3678                 sk->mtu = sk->user_mss;
3679         else if(rt!=NULL && (rt->rt_flags&RTF_MTU))
3680                 sk->mtu = rt->rt_mss;
3681         else 
3682         {
3683 #ifdef CONFIG_INET_SNARL
3684                 if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3685 #else
3686                 if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3687 #endif
3688                         sk->mtu = 576 - HEADER_SIZE;
3689                 else
3690                         sk->mtu = MAX_WINDOW;
3691         }
3692         /*
3693          *      but not bigger than device MTU 
3694          */
3695 
3696         if(sk->mtu <32)
3697                 sk->mtu = 32;   /* Sanity limit */
3698                 
3699         sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3700         
3701         /*
3702          *      Put in the TCP options to say MTU. 
3703          */
3704 
3705         ptr = (unsigned char *)(t1+1);
3706         ptr[0] = 2;
3707         ptr[1] = 4;
3708         ptr[2] = (sk->mtu) >> 8;
3709         ptr[3] = (sk->mtu) & 0xff;
3710         tcp_send_check(t1, sk->saddr, sk->daddr,
3711                   sizeof(struct tcphdr) + 4, sk);
3712 
3713         /*
3714          *      This must go first otherwise a really quick response will get reset. 
3715          */
3716 
3717         tcp_set_state(sk,TCP_SYN_SENT);
3718         sk->rto = TCP_TIMEOUT_INIT;
3719         reset_timer(sk, TIME_WRITE, sk->rto);   /* Timer for repeating the SYN until an answer */
3720         sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3721 
3722         sk->prot->queue_xmit(sk, dev, buff, 0);  
3723         tcp_statistics.TcpActiveOpens++;
3724         tcp_statistics.TcpOutSegs++;
3725   
3726         release_sock(sk);
3727         return(0);
3728 }
3729 
3730 
3731 /* This functions checks to see if the tcp header is actually acceptable. */
3732 static int
3733 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3734              struct options *opt, unsigned long saddr, struct device *dev)
3735 {
3736         unsigned long next_seq;
3737 
3738         next_seq = len - 4*th->doff;
3739         if (th->fin)
3740                 next_seq++;
3741         /* if we have a zero window, we can't have any data in the packet.. */
3742         if (next_seq && !sk->window)
3743                 goto ignore_it;
3744         next_seq += th->seq;
3745 
3746         /*
3747          * This isn't quite right.  sk->acked_seq could be more recent
3748          * than sk->window.  This is however close enough.  We will accept
3749          * slightly more packets than we should, but it should not cause
3750          * problems unless someone is trying to forge packets.
3751          */
3752 
3753         /* have we already seen all of this packet? */
3754         if (!after(next_seq+1, sk->acked_seq))
3755                 goto ignore_it;
3756         /* or does it start beyond the window? */
3757         if (!before(th->seq, sk->acked_seq + sk->window + 1))
3758                 goto ignore_it;
3759 
3760         /* ok, at least part of this packet would seem interesting.. */
3761         return 1;
3762 
3763 ignore_it:
3764         if (th->rst)
3765                 return 0;
3766 
3767         /*
3768          *      Send a reset if we get something not ours and we are
3769          *      unsynchronized. Note: We don't do anything to our end. We
3770          *      are just killing the bogus remote connection then we will
3771          *      connect again and it will work (with luck).
3772          */
3773          
3774         if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) {
3775                 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3776                 return 1;
3777         }
3778 
3779         /* Try to resync things. */
3780         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3781         return 0;
3782 }
3783 
3784 
3785 #ifdef TCP_FASTPATH
3786 /*
3787  *      Is the end of the queue clear of fragments as yet unmerged into the data stream
3788  *      Yes if
3789  *      a) The queue is empty
3790  *      b) The last frame on the queue has the acked flag set
3791  */
3792 
3793 static inline int tcp_clean_end(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3794 {
3795         struct sk_buff *skb=skb_peek(&sk->receive_queue);
3796         if(skb==NULL || sk->receive_queue.prev->acked)
3797                 return 1;
3798 }
3799 
3800 #endif
3801 
3802 int
3803 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3804         unsigned long daddr, unsigned short len,
3805         unsigned long saddr, int redo, struct inet_protocol * protocol)
3806 {
3807         struct tcphdr *th;
3808         struct sock *sk;
3809 
3810         if (!skb) 
3811         {
3812                 return(0);
3813         }
3814 
3815         if (!dev) 
3816         {
3817                 return(0);
3818         }
3819   
3820         tcp_statistics.TcpInSegs++;
3821   
3822         if(skb->pkt_type!=PACKET_HOST)
3823         {
3824                 kfree_skb(skb,FREE_READ);
3825                 return(0);
3826         }
3827   
3828         th = skb->h.th;
3829 
3830         /*
3831          *      Find the socket.
3832          */
3833 
3834         sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3835 
3836         /*
3837          *      If this socket has got a reset its to all intents and purposes 
3838          *      really dead 
3839          */
3840          
3841         if (sk!=NULL && sk->zapped)
3842                 sk=NULL;
3843 
3844         if (!redo) 
3845         {
3846                 if (tcp_check(th, len, saddr, daddr )) 
3847                 {
3848                         skb->sk = NULL;
3849                         kfree_skb(skb,FREE_READ);
3850                         /*
3851                          * We don't release the socket because it was
3852                          * never marked in use.
3853                          */
3854                         return(0);
3855                 }
3856                 th->seq = ntohl(th->seq);
3857 
3858                 /* See if we know about the socket. */
3859                 if (sk == NULL) 
3860                 {
3861                         if (!th->rst)
3862                                 tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3863                         skb->sk = NULL;
3864                         kfree_skb(skb, FREE_READ);
3865                         return(0);
3866                 }
3867 
3868                 skb->len = len;
3869                 skb->sk = sk;
3870                 skb->acked = 0;
3871                 skb->used = 0;
3872                 skb->free = 0;
3873                 skb->saddr = daddr;
3874                 skb->daddr = saddr;
3875         
3876                 /* We may need to add it to the backlog here. */
3877                 cli();
3878                 if (sk->inuse) 
3879                 {
3880                         skb_queue_tail(&sk->back_log, skb);
3881                         sti();
3882                         return(0);
3883                 }
3884                 sk->inuse = 1;
3885                 sti();
3886         }
3887         else
3888         {
3889                 if (!sk) 
3890                 {
3891                         return(0);
3892                 }
3893         }
3894 
3895 
3896         if (!sk->prot) 
3897         {
3898                 return(0);
3899         }
3900 
3901 
3902         /*
3903          *      Charge the memory to the socket. 
3904          */
3905          
3906         if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) 
3907         {
3908                 skb->sk = NULL;
3909                 kfree_skb(skb, FREE_READ);
3910                 release_sock(sk);
3911                 return(0);
3912         }
3913 
3914         sk->rmem_alloc += skb->mem_len;
3915 
3916 #ifdef TCP_FASTPATH
3917 /*
3918  *      Incoming data stream fastpath. 
3919  *
3920  *      We try to optimise two things.
3921  *      1) Spot general data arriving without funny options and skip extra checks and the switch.
3922  *      2) Spot the common case in raw data receive streams of a packet that has no funny options,
3923  *      fits exactly on the end of the current queue and may or may not have the ack bit set.
3924  *
3925  *      Case two especially is done inline in this routine so there are no long jumps causing heavy
3926  *      cache thrashing, no function call overhead (except for the ack sending if needed) and for
3927  *      speed although further optimizing here is possible.
3928  */
3929  
3930         /* I'm trusting gcc to optimise this sensibly... might need judicious application of a software mallet */
3931         if(!(sk->shutdown & RCV_SHUTDOWN) && sk->state==TCP_ESTABLISHED && !th->urg && !th->syn && !th->fin && !th->rst)
3932         {       
3933                 /* Packets in order. Fits window */
3934                 if(th->seq == sk->acked_seq+1 && sk->window && tcp_clean_end(sk))
3935                 {
3936                         /* Ack is harder */
3937                         if(th->ack && !tcp_ack(sk, th, saddr, len))
3938                         {
3939                                 kfree_skb(skb, FREE_READ);
3940                                 release_sock(sk);
3941                                 return 0;
3942                         }
3943                         /*
3944                          *      Set up variables
3945                          */
3946                         skb->len -= (th->doff *4);
3947                         sk->bytes_rcv += skb->len;
3948                         tcp_rx_hit2++;
3949                         if(skb->len)
3950                         {
3951                                 skb_queue_tail(&sk->receive_queue,skb); /* We already know where to put it */
3952                                 if(sk->window >= skb->len)
3953                                         sk->window-=skb->len;                   /* We know its effect on the window */
3954                                 else
3955                                         sk->window=0;
3956                                 sk->acked_seq = th->seq+skb->len;       /* Easy */
3957                                 skb->acked=1;                           /* Guaranteed true */
3958                                 if(!sk->delay_acks || sk->ack_backlog >= sk->max_ack_backlog || 
3959                                         sk->bytes_rcv > sk->max_unacked)
3960                                 {
3961                                         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th , saddr);
3962                                 }
3963                                 else
3964                                 {
3965                                         sk->ack_backlog++;
3966                                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3967                                 }
3968                                 if(!sk->dead)
3969                                         sk->data_ready(sk,0);
3970                                 release_sock(sk);
3971                                 return 0;
3972                         }
3973                 }
3974                 /*
3975                  *      More generic case of arriving data stream in ESTABLISHED
3976                  */
3977                 tcp_rx_hit1++;
3978                 if(!tcp_sequence(sk, th, len, opt, saddr, dev))
3979                 {
3980                         kfree_skb(skb, FREE_READ);
3981                         release_sock(sk);
3982                         return 0;
3983                 }
3984                 if(th->ack && !tcp_ack(sk, th, saddr, len))
3985                 {
3986                         kfree_skb(skb, FREE_READ);
3987                         release_sock(sk);
3988                         return 0;
3989                 }
3990                 if(tcp_data(skb, sk, saddr, len))
3991                         kfree_skb(skb, FREE_READ);
3992                 release_sock(sk);
3993                 return 0;
3994         }
3995         tcp_rx_miss++;
3996 #endif  
3997 
3998         /*
3999          *      Now deal with all cases.
4000          */
4001          
4002         switch(sk->state) 
4003         {
4004         
4005                 /*
4006                  * This should close the system down if it's waiting
4007                  * for an ack that is never going to be sent.
4008                  */
4009                 case TCP_LAST_ACK:
4010                         if (th->rst) 
4011                         {
4012                                 sk->zapped=1;
4013                                 sk->err = ECONNRESET;
4014                                 tcp_set_state(sk,TCP_CLOSE);
4015                                 sk->shutdown = SHUTDOWN_MASK;
4016                                 if (!sk->dead) 
4017                                 {
4018                                         sk->state_change(sk);
4019                                 }
4020                                 kfree_skb(skb, FREE_READ);
4021                                 release_sock(sk);
4022                                 return(0);
4023                         }
4024 
4025                 case TCP_ESTABLISHED:
4026                 case TCP_CLOSE_WAIT:
4027                 case TCP_CLOSING:
4028                 case TCP_FIN_WAIT1:
4029                 case TCP_FIN_WAIT2:
4030                 case TCP_TIME_WAIT:
4031 
4032                         /*
4033                          * is it a good packet?
4034                          */
4035 
4036                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
4037                         {
4038                                 kfree_skb(skb, FREE_READ);
4039                                 release_sock(sk);
4040                                 return(0);
4041                         }
4042 
4043                         if (th->rst) 
4044                         {
4045                                 tcp_statistics.TcpEstabResets++;
4046                                 sk->zapped=1;
4047                                 /* This means the thing should really be closed. */
4048                                 sk->err = ECONNRESET;
4049                                 if (sk->state == TCP_CLOSE_WAIT) 
4050                                 {
4051                                         sk->err = EPIPE;
4052                                 }
4053         
4054                                 /*
4055                                  * A reset with a fin just means that
4056                                  * the data was not all read.
4057                                  */
4058                                 tcp_set_state(sk,TCP_CLOSE);
4059                                 sk->shutdown = SHUTDOWN_MASK;
4060                                 if (!sk->dead) 
4061                                 {
4062                                         sk->state_change(sk);
4063                                 }
4064                                 kfree_skb(skb, FREE_READ);
4065                                 release_sock(sk);
4066                                 return(0);
4067                         }
4068                         if (th->syn) 
4069                         {
4070                                 long seq=sk->write_seq;
4071                                 int st=sk->state;
4072                                 tcp_statistics.TcpEstabResets++;
4073                                 sk->err = ECONNRESET;
4074                                 tcp_set_state(sk,TCP_CLOSE);
4075                                 sk->shutdown = SHUTDOWN_MASK;
4076                                 if(sk->debug)
4077                                         printk("Socket %p reset by SYN while established.\n", sk);
4078                                 if (!sk->dead) {
4079                                         sk->state_change(sk);
4080                                 }
4081                                 /*
4082                                  *      The BSD port reuse protocol violation.
4083                                  *      I do sometimes wonder how the *bsd people
4084                                  *      have the nerve to talk about 'standards'.
4085                                  *
4086                                  *      If seq > last used on connection then
4087                                  *      open a new connection and use 128000+seq of
4088                                  *      old connection.
4089                                  *
4090                                  */
4091                                  
4092                                 if(st==TCP_TIME_WAIT && th->seq > sk->acked_seq && sk->dead)
4093                                 {
4094                                         struct sock *psk=sk;
4095                                         /*
4096                                          *      Find the listening socket.
4097                                          */
4098                                         sk=get_sock(&tcp_prot, th->source, daddr, th->dest, saddr);
4099                                         if(sk && sk->state==TCP_LISTEN)
4100                                         {
4101                                                 sk->inuse=1;
4102                                                 tcp_conn_request(sk, skb, daddr, saddr,opt, dev,seq+128000);
4103                                                 release_sock(psk);
4104                                                 /* Fall through in case people are
4105                                                    also using the piggy backed SYN + data 
4106                                                    protocol violation */
4107                                         }
4108                                         else
4109                                         {
4110                                                 tcp_reset(daddr, saddr,  th, psk->prot, opt,dev, psk->ip_tos,psk->ip_ttl);
4111                                                 release_sock(psk);
4112                                                 kfree_skb(skb, FREE_READ);
4113                                                 return 0;
4114                                         }                       
4115                                 }
4116                                 else
4117                                 {
4118                                         tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
4119                                         kfree_skb(skb, FREE_READ);
4120                                         release_sock(sk);
4121                                         return(0);
4122                                 }
4123                         }       
4124                         if (th->ack && !tcp_ack(sk, th, saddr, len)) {
4125                                 kfree_skb(skb, FREE_READ);
4126                                 release_sock(sk);
4127                                 return(0);
4128                         }
4129         
4130                         if (tcp_urg(sk, th, saddr, len)) {
4131                                 kfree_skb(skb, FREE_READ);
4132                                 release_sock(sk);
4133                                 return(0);
4134                         }
4135 
4136         
4137                         if (tcp_data(skb, sk, saddr, len)) {
4138                                 kfree_skb(skb, FREE_READ);
4139                                 release_sock(sk);
4140                                 return(0);
4141                         }       
4142 
4143                         if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) {
4144                                 kfree_skb(skb, FREE_READ);
4145                                 release_sock(sk);
4146                                 return(0);
4147                         }
4148         
4149                         release_sock(sk);
4150                         return(0);
4151 
4152 
4153                 case TCP_CLOSE:
4154                         if (sk->dead || sk->daddr) {
4155                                 kfree_skb(skb, FREE_READ);
4156                                         release_sock(sk);
4157                                 return(0);
4158                         }
4159         
4160                         if (!th->rst) {
4161                                 if (!th->ack)
4162                                         th->ack_seq = 0;
4163                                 if(sk->debug) printk("Reset on closed socket %s.\n",sk->blog);
4164                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4165                         }
4166                         kfree_skb(skb, FREE_READ);
4167                         release_sock(sk);
4168                                 return(0);
4169         
4170                 case TCP_LISTEN:
4171                         if (th->rst) {
4172                                 kfree_skb(skb, FREE_READ);
4173                                 release_sock(sk);
4174                                 return(0);
4175                         }
4176                         if (th->ack) {
4177                                 printk("Reset on listening socket %d.\n",sk->blog);
4178                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4179                                 kfree_skb(skb, FREE_READ);
4180                                 release_sock(sk);
4181                                 return(0);
4182                         }
4183         
4184                         if (th->syn) 
4185                         {
4186                                 /*
4187                                  * Now we just put the whole thing including
4188                                  * the header and saddr, and protocol pointer
4189                                  * into the buffer.  We can't respond until the
4190                                  * user tells us to accept the connection.
4191                                  */
4192                                 tcp_conn_request(sk, skb, daddr, saddr, opt, dev, tcp_init_seq());
4193                                 release_sock(sk);
4194                                 return(0);
4195                         }
4196 
4197                         kfree_skb(skb, FREE_READ);
4198                         release_sock(sk);
4199                         return(0);
4200 
4201                 case TCP_SYN_RECV:
4202                         if (th->syn) {
4203                                 /* Probably a retransmitted syn */
4204                                 kfree_skb(skb, FREE_READ);
4205                                 release_sock(sk);
4206                                 return(0);
4207                         }
4208         
4209         
4210                 default:
4211                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
4212                         {
4213                                 kfree_skb(skb, FREE_READ);
4214                                 release_sock(sk);
4215                                 return(0);
4216                         }
4217         
4218                 case TCP_SYN_SENT:
4219                         if (th->rst) 
4220                         {
4221                                 tcp_statistics.TcpAttemptFails++;
4222                                 sk->err = ECONNREFUSED;
4223                                 tcp_set_state(sk,TCP_CLOSE);
4224                                 sk->shutdown = SHUTDOWN_MASK;
4225                                 sk->zapped = 1;
4226                                 if (!sk->dead) 
4227                                 {
4228                                         sk->state_change(sk);
4229                                 }
4230                                 kfree_skb(skb, FREE_READ);
4231                                 release_sock(sk);
4232                                 return(0);
4233                         }
4234                         if (!th->ack) 
4235                         {
4236                                 if (th->syn) 
4237                                 {
4238                                         /* Crossed SYN's are fine - but talking to
4239                                            yourself is right out... */
4240                                         if(sk->saddr==saddr && sk->daddr==daddr &&
4241                                                 sk->dummy_th.source==th->source &&
4242                                                 sk->dummy_th.dest==th->dest)
4243                                         {
4244                                                 tcp_statistics.TcpAttemptFails++;
4245                                                 sk->err = ECONNREFUSED;
4246                                                 tcp_set_state(sk,TCP_CLOSE);
4247                                                 sk->shutdown = SHUTDOWN_MASK;
4248                                                 sk->zapped = 1;
4249                                                 if (!sk->dead) 
4250                                                 {
4251                                                         sk->state_change(sk);
4252                                                 }
4253                                                 kfree_skb(skb, FREE_READ);
4254                                                 release_sock(sk);
4255                                                 return(0);
4256                                         }
4257                                         tcp_set_state(sk,TCP_SYN_RECV);
4258                                 }
4259                                 kfree_skb(skb, FREE_READ);
4260                                 release_sock(sk);
4261                                 return(0);
4262                         }
4263         
4264                         switch(sk->state) 
4265                         {
4266                                 case TCP_SYN_SENT:
4267                                         if (!tcp_ack(sk, th, saddr, len)) 
4268                                         {
4269                                                 tcp_statistics.TcpAttemptFails++;
4270                                                 tcp_reset(daddr, saddr, th,
4271                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4272                                                 kfree_skb(skb, FREE_READ);
4273                                                         release_sock(sk);
4274                                                 return(0);
4275                                         }
4276         
4277                                         /*
4278                                          * If the syn bit is also set, switch to
4279                                          * tcp_syn_recv, and then to established.
4280                                          */
4281                                         if (!th->syn) 
4282                                         {
4283                                                 kfree_skb(skb, FREE_READ);
4284                                                 release_sock(sk);
4285                                                 return(0);
4286                                         }
4287         
4288                                         /* Ack the syn and fall through. */
4289                                         sk->acked_seq = th->seq+1;
4290                                         sk->fin_seq = th->seq;
4291                                         tcp_send_ack(sk->sent_seq, th->seq+1,
4292                                                 sk, th, sk->daddr);
4293                 
4294                                 case TCP_SYN_RECV:
4295                                         if (!tcp_ack(sk, th, saddr, len)) 
4296                                         {
4297                                                 tcp_statistics.TcpAttemptFails++;
4298                                                 tcp_reset(daddr, saddr, th,
4299                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
4300                                                 kfree_skb(skb, FREE_READ);
4301                                                 release_sock(sk);
4302                                                 return(0);
4303                                         }
4304         
4305                                         tcp_set_state(sk,TCP_ESTABLISHED);
4306         
4307                                         /*
4308                                          *      Now we need to finish filling out
4309                                          *      some of the tcp header.
4310                                          * 
4311                                          *      We need to check for mtu info. 
4312                                          */
4313                                         tcp_options(sk, th);
4314                                         sk->dummy_th.dest = th->source;
4315                                         sk->copied_seq = sk->acked_seq-1;
4316                                         if (!sk->dead) 
4317                                         {
4318                                                 sk->state_change(sk);
4319                                         }
4320         
4321                                         /*
4322                                          * We've already processed his first
4323                                          * ack.  In just about all cases that
4324                                          * will have set max_window.  This is
4325                                          * to protect us against the possibility
4326                                          * that the initial window he sent was 0.
4327                                          * This must occur after tcp_options, which
4328                                          * sets sk->mtu.
4329                                          */
4330                                         if (sk->max_window == 0) 
4331                                         {
4332                                                 sk->max_window = 32;
4333                                                 sk->mss = min(sk->max_window, sk->mtu);
4334                                         }
4335 
4336                                         /*
4337                                          * Now process the rest like we were
4338                                          * already in the established state.
4339                                          */
4340                                         if (th->urg) 
4341                                         {
4342                                                 if (tcp_urg(sk, th, saddr, len)) 
4343                                                 { 
4344                                                         kfree_skb(skb, FREE_READ);
4345                                                         release_sock(sk);
4346                                                         return(0);
4347                                                 }
4348                                         }
4349                                         if (tcp_data(skb, sk, saddr, len))
4350                                                 kfree_skb(skb, FREE_READ);
4351 
4352                                         if (th->fin)
4353                                                 tcp_fin(skb, sk, th, saddr, dev);
4354                                         release_sock(sk);
4355                                         return(0);
4356                         }
4357         
4358                         if (th->urg) 
4359                         {
4360                                 if (tcp_urg(sk, th, saddr, len)) 
4361                                 {
4362                                         kfree_skb(skb, FREE_READ);
4363                                         release_sock(sk);
4364                                         return(0);
4365                                 }
4366                         }
4367                         if (tcp_data(skb, sk, saddr, len)) 
4368                         {
4369                                 kfree_skb(skb, FREE_READ);
4370                                 release_sock(sk);
4371                                 return(0);
4372                         }
4373         
4374                         if (!th->fin) 
4375                         {
4376                                 release_sock(sk);
4377                                 return(0);
4378                         }
4379                         tcp_fin(skb, sk, th, saddr, dev);
4380                         release_sock(sk);
4381                         return(0);
4382         }
4383 }
4384 
4385 
4386 /*
4387  * This routine sends a packet with an out of date sequence
4388  * number. It assumes the other end will try to ack it.
4389  */
4390 
4391 static void tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4392 {
4393         struct sk_buff *buff;
4394         struct tcphdr *t1;
4395         struct device *dev=NULL;
4396         int tmp;
4397 
4398         if (sk->zapped)
4399                 return; /* After a valid reset we can send no more */
4400 
4401         /*
4402          * Write data can still be transmitted/retransmitted in the
4403          * following states.  If any other state is encountered, return.
4404          */
4405 
4406         if (sk->state != TCP_ESTABLISHED && 
4407             sk->state != TCP_CLOSE_WAIT &&
4408             sk->state != TCP_FIN_WAIT1 && 
4409             sk->state != TCP_LAST_ACK &&
4410             sk->state != TCP_CLOSING
4411         ) {
4412                 return;
4413         }
4414 
4415         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
4416         if (buff == NULL) 
4417                 return;
4418 
4419         buff->len = sizeof(struct tcphdr);
4420         buff->free = 1;
4421         buff->sk = sk;
4422         buff->localroute = sk->localroute;
4423 
4424         t1 = (struct tcphdr *) buff->data;
4425 
4426         /* Put in the IP header and routing stuff. */
4427         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
4428                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
4429         if (tmp < 0) 
4430         {
4431                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
4432                 return;
4433         }
4434 
4435         buff->len += tmp;
4436         t1 = (struct tcphdr *)((char *)t1 +tmp);
4437 
4438         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
4439 
4440         /*
4441          * Use a previous sequence.
4442          * This should cause the other end to send an ack.
4443          */
4444         t1->seq = htonl(sk->sent_seq-1);
4445         t1->ack = 1; 
4446         t1->res1= 0;
4447         t1->res2= 0;
4448         t1->rst = 0;
4449         t1->urg = 0;
4450         t1->psh = 0;
4451         t1->fin = 0;
4452         t1->syn = 0;
4453         t1->ack_seq = ntohl(sk->acked_seq);
4454         t1->window = ntohs(tcp_select_window(sk));
4455         t1->doff = sizeof(*t1)/4;
4456         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
4457 
4458          /*     Send it and free it.
4459           *     This will prevent the timer from automatically being restarted.
4460           */
4461         sk->prot->queue_xmit(sk, dev, buff, 1);
4462         tcp_statistics.TcpOutSegs++;
4463 }
4464 
4465 void
4466 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4467 {
4468         if (sk->zapped)
4469                 return;         /* After a valid reset we can send no more */
4470 
4471         tcp_write_wakeup(sk);
4472 
4473         sk->backoff++;
4474         sk->rto = min(sk->rto << 1, 120*HZ);
4475         reset_timer (sk, TIME_PROBE0, sk->rto);
4476         sk->retransmits++;
4477         sk->prot->retransmits ++;
4478 }
4479 
4480 /*
4481  *      Socket option code for TCP. 
4482  */
4483   
4484 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4485 {
4486         int val,err;
4487 
4488         if(level!=SOL_TCP)
4489                 return ip_setsockopt(sk,level,optname,optval,optlen);
4490 
4491         if (optval == NULL) 
4492                 return(-EINVAL);
4493 
4494         err=verify_area(VERIFY_READ, optval, sizeof(int));
4495         if(err)
4496                 return err;
4497         
4498         val = get_fs_long((unsigned long *)optval);
4499 
4500         switch(optname)
4501         {
4502                 case TCP_MAXSEG:
4503 /*
4504  * values greater than interface MTU won't take effect.  however at
4505  * the point when this call is done we typically don't yet know
4506  * which interface is going to be used
4507  */
4508                         if(val<1||val>MAX_WINDOW)
4509                                 return -EINVAL;
4510                         sk->user_mss=val;
4511                         return 0;
4512                 case TCP_NODELAY:
4513                         sk->nonagle=(val==0)?0:1;
4514                         return 0;
4515                 default:
4516                         return(-ENOPROTOOPT);
4517         }
4518 }
4519 
4520 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4521 {
4522         int val,err;
4523 
4524         if(level!=SOL_TCP)
4525                 return ip_getsockopt(sk,level,optname,optval,optlen);
4526                         
4527         switch(optname)
4528         {
4529                 case TCP_MAXSEG:
4530                         val=sk->user_mss;
4531                         break;
4532                 case TCP_NODELAY:
4533                         val=sk->nonagle;
4534                         break;
4535                 default:
4536                         return(-ENOPROTOOPT);
4537         }
4538         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
4539         if(err)
4540                 return err;
4541         put_fs_long(sizeof(int),(unsigned long *) optlen);
4542 
4543         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
4544         if(err)
4545                 return err;
4546         put_fs_long(val,(unsigned long *)optval);
4547 
4548         return(0);
4549 }       
4550 
4551 
4552 struct proto tcp_prot = {
4553         sock_wmalloc,
4554         sock_rmalloc,
4555         sock_wfree,
4556         sock_rfree,
4557         sock_rspace,
4558         sock_wspace,
4559         tcp_close,
4560         tcp_read,
4561         tcp_write,
4562         tcp_sendto,
4563         tcp_recvfrom,
4564         ip_build_header,
4565         tcp_connect,
4566         tcp_accept,
4567         ip_queue_xmit,
4568         tcp_retransmit,
4569         tcp_write_wakeup,
4570         tcp_read_wakeup,
4571         tcp_rcv,
4572         tcp_select,
4573         tcp_ioctl,
4574         NULL,
4575         tcp_shutdown,
4576         tcp_setsockopt,
4577         tcp_getsockopt,
4578         128,
4579         0,
4580         {NULL,},
4581         "TCP"
4582 };

/* [previous][next][first][last][top][bottom][index][help] */