root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. tcp_set_state
  3. tcp_select_window
  4. tcp_find_established
  5. tcp_close_pending
  6. tcp_dequeue_established
  7. tcp_time_wait
  8. tcp_retransmit
  9. tcp_err
  10. tcp_readable
  11. tcp_select
  12. tcp_ioctl
  13. tcp_check
  14. tcp_send_check
  15. tcp_send_skb
  16. tcp_dequeue_partial
  17. tcp_send_partial
  18. tcp_enqueue_partial
  19. tcp_send_ack
  20. tcp_build_header
  21. tcp_write
  22. tcp_sendto
  23. tcp_read_wakeup
  24. cleanup_rbuf
  25. tcp_read_urg
  26. tcp_read
  27. tcp_shutdown
  28. tcp_recvfrom
  29. tcp_reset
  30. tcp_options
  31. default_mask
  32. tcp_init_seq
  33. tcp_conn_request
  34. tcp_close
  35. tcp_write_xmit
  36. tcp_ack
  37. tcp_data
  38. tcp_check_urg
  39. tcp_urg
  40. tcp_fin
  41. tcp_accept
  42. tcp_connect
  43. tcp_sequence
  44. tcp_clean_end
  45. tcp_rcv
  46. tcp_write_wakeup
  47. tcp_send_probe0
  48. tcp_setsockopt
  49. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@no.unit.nvg>
  20  *
  21  * Fixes:       
  22  *              Alan Cox        :       Numerous verify_area() calls
  23  *              Alan Cox        :       Set the ACK bit on a reset
  24  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  25  *                                      and was trying to connect (tcp_err()).
  26  *              Alan Cox        :       All icmp error handling was broken
  27  *                                      pointers passed where wrong and the
  28  *                                      socket was looked up backwards. Nobody
  29  *                                      tested any icmp error code obviously.
  30  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  31  *                                      on errors. select behaves and the icmp error race
  32  *                                      has gone by moving it into sock.c
  33  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  34  *                                      packets for unknown sockets.
  35  *              Alan Cox        :       tcp option processing.
  36  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  37  *              Herp Rosmanith  :       More reset fixes
  38  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  39  *                                      any kind of RST is right out.
  40  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  41  *                                      otherwise odd bits of prattle escape still
  42  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  43  *                                      LAN workplace lockups.
  44  *              Alan Cox        :       Some tidyups using the new skb list facilities
  45  *              Alan Cox        :       sk->keepopen now seems to work
  46  *              Alan Cox        :       Pulls options out correctly on accepts
  47  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  48  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  49  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  50  *              Alan Cox        :       Added some better commenting, as the tcp is hard to follow
  51  *              Alan Cox        :       Removed incorrect check for 20 * psh
  52  *      Michael O'Reilly        :       ack < copied bug fix.
  53  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  54  *              Alan Cox        :       FIN with no memory -> CRASH
  55  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  56  *              Alan Cox        :       Added TCP options (SOL_TCP)
  57  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  58  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  59  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  60  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  61  *              Alan Cox        :       Put in missing check for SYN bit.
  62  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  63  *                                      window non shrink trick.
  64  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  65  *              Charles Hedrick :       TCP fixes
  66  *              Toomas Tamm     :       TCP window fixes
  67  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  68  *              Charles Hedrick :       Rewrote most of it to actually work
  69  *              Linus           :       Rewrote tcp_read() and URG handling
  70  *                                      completely
  71  *              Gerhard Koerting:       Fixed some missing timer handling
  72  *              Matthew Dillon  :       Reworked TCP machine states as per RFC
  73  *              Gerhard Koerting:       PC/TCP workarounds
  74  *              Adam Caldwell   :       Assorted timer/timing errors
  75  *              Matthew Dillon  :       Fixed another RST bug
  76  *              Alan Cox        :       Move to kernel side addressing changes.
  77  *              Alan Cox        :       Beginning work on TCP fastpathing (not yet usable)
  78  *              Arnt Gulbrandsen:       Turbocharged tcp_check() routine.
  79  *              Alan Cox        :       TCP fast path debugging
  80  *              Alan Cox        :       Window clamping
  81  *              Michael Riepe   :       Bug in tcp_check()
  82  *              Matt Dillon     :       More TCP improvements and RST bug fixes
  83  *              Matt Dillon     :       Yet more small nasties remove from the TCP code
  84  *                                      (Be very nice to this man if tcp finally works 100%) 8)
  85  *              Alan Cox        :       BSD accept semantics. 
  86  *              Alan Cox        :       Reset on closedown bug.
  87  *      Peter De Schrijver      :       ENOTCONN check missing in tcp_sendto().
  88  *              Michael Pall    :       Handle select() after URG properly in all cases.
  89  *              Michael Pall    :       Undo the last fix in tcp_read_urg() (multi URG PUSH broke rlogin).
  90  *              Michael Pall    :       Fix the multi URG PUSH problem in tcp_readable(), select() after URG works now.
  91  *              Michael Pall    :       recv(...,MSG_OOB) never blocks in the BSD api.
  92  *              Alan Cox        :       Changed the semantics of sk->socket to 
  93  *                                      fix a race and a signal problem with
  94  *                                      accept() and async I/O.
  95  *              Alan Cox        :       Relaxed the rules on tcp_sendto().
  96  *              Yury Shevchuk   :       Really fixed accept() blocking problem.
  97  *              Craig I. Hagan  :       Allow for BSD compatible TIME_WAIT for
  98  *                                      clients/servers which listen in on
  99  *                                      fixed ports.
 100  *              Alan Cox        :       Cleaned the above up and shrank it to
 101  *                                      a sensible code size.
 102  *              Alan Cox        :       Self connect lockup fix.
 103  *              Alan Cox        :       No connect to multicast.
 104  *              Ross Biro       :       Close unaccepted children on master
 105  *                                      socket close.
 106  *              Alan Cox        :       Reset tracing code.
 107  *              Alan Cox        :       Spurious resets on shutdown.
 108  *
 109  *
 110  * To Fix:
 111  *                      Fast path the code. Two things here - fix the window calculation
 112  *              so it doesn't iterate over the queue, also spot packets with no funny
 113  *              options arriving in order and process directly.
 114  *
 115  *              This program is free software; you can redistribute it and/or
 116  *              modify it under the terms of the GNU General Public License
 117  *              as published by the Free Software Foundation; either version
 118  *              2 of the License, or(at your option) any later version.
 119  *
 120  * Description of States:
 121  *
 122  *      TCP_SYN_SENT            sent a connection request, waiting for ack
 123  *
 124  *      TCP_SYN_RECV            received a connection request, sent ack,
 125  *                              waiting for final ack in three-way handshake.
 126  *
 127  *      TCP_ESTABLISHED         connection established
 128  *
 129  *      TCP_FIN_WAIT1           our side has shutdown, waiting to complete
 130  *                              transmission of remaining buffered data
 131  *
 132  *      TCP_FIN_WAIT2           all buffered data sent, waiting for remote
 133  *                              to shutdown
 134  *
 135  *      TCP_CLOSING             both sides have shutdown but we still have
 136  *                              data we have to finish sending
 137  *
 138  *      TCP_TIME_WAIT           timeout to catch resent junk before entering
 139  *                              closed, can only be entered from FIN_WAIT2
 140  *                              or CLOSING.  Required because the other end
 141  *                              may not have gotten our last ACK causing it
 142  *                              to retransmit the data packet (which we ignore)
 143  *
 144  *      TCP_CLOSE_WAIT          remote side has shutdown and is waiting for
 145  *                              us to finish writing our data and to shutdown
 146  *                              (we have to close() to move on to LAST_ACK)
 147  *
 148  *      TCP_LAST_ACK            out side has shutdown after remote has
 149  *                              shutdown.  There may still be data in our
 150  *                              buffer that we have to finish sending
 151  *              
 152  *      TCP_CLOSE               socket is finished
 153  */
 154 #include <linux/types.h>
 155 #include <linux/sched.h>
 156 #include <linux/mm.h>
 157 #include <linux/string.h>
 158 #include <linux/socket.h>
 159 #include <linux/sockios.h>
 160 #include <linux/termios.h>
 161 #include <linux/in.h>
 162 #include <linux/fcntl.h>
 163 #include <linux/inet.h>
 164 #include <linux/netdevice.h>
 165 #include "snmp.h"
 166 #include "ip.h"
 167 #include "protocol.h"
 168 #include "icmp.h"
 169 #include "tcp.h"
 170 #include <linux/skbuff.h>
 171 #include "sock.h"
 172 #include "route.h"
 173 #include <linux/errno.h>
 174 #include <linux/timer.h>
 175 #include <asm/system.h>
 176 #include <asm/segment.h>
 177 #include <linux/mm.h>
 178 
 179 #undef TCP_FASTPATH
 180 
 181 #define SEQ_TICK 3
 182 unsigned long seq_offset;
 183 struct tcp_mib  tcp_statistics;
 184 
 185 static void tcp_close(struct sock *sk, int timeout);
 186 
 187 #ifdef TCP_FASTPATH
 188 unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0;
 189 #endif
 190 
 191 
 192 static __inline__ int min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 193 {
 194         if (a < b) 
 195                 return(a);
 196         return(b);
 197 }
 198 
 199 #undef STATE_TRACE
 200 
 201 static __inline__ void tcp_set_state(struct sock *sk, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 202 {
 203         if(sk->state==TCP_ESTABLISHED)
 204                 tcp_statistics.TcpCurrEstab--;
 205 #ifdef STATE_TRACE
 206         if(sk->debug)
 207                 printk("TCP sk=%s, State %d -> %d\n",sk, sk->state,state);
 208 #endif  
 209         sk->state=state;
 210         if(state==TCP_ESTABLISHED)
 211                 tcp_statistics.TcpCurrEstab++;
 212 }
 213 
 214 /* This routine picks a TCP windows for a socket based on
 215    the following constraints
 216    
 217    1. The window can never be shrunk once it is offered (RFC 793)
 218    2. We limit memory per socket
 219    
 220    For now we use NET2E3's heuristic of offering half the memory
 221    we have handy. All is not as bad as this seems however because
 222    of two things. Firstly we will bin packets even within the window
 223    in order to get the data we are waiting for into the memory limit.
 224    Secondly we bin common duplicate forms at receive time
 225    
 226    Better heuristics welcome
 227 */
 228    
 229 int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 230 {
 231         int new_window = sk->prot->rspace(sk);
 232         
 233         if(sk->window_clamp)
 234                 new_window=min(sk->window_clamp,new_window);
 235 /*
 236  * two things are going on here.  First, we don't ever offer a
 237  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 238  * receiver side of SWS as specified in RFC1122.
 239  * Second, we always give them at least the window they
 240  * had before, in order to avoid retracting window.  This
 241  * is technically allowed, but RFC1122 advises against it and
 242  * in practice it causes trouble.
 243  */
 244         if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window)
 245                 return(sk->window);
 246         return(new_window);
 247 }
 248 
 249 /*
 250  *      Find someone to 'accept'. Must be called with
 251  *      sk->inuse=1 or cli()
 252  */ 
 253 
 254 static struct sk_buff *tcp_find_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 255 {
 256         struct sk_buff *p=skb_peek(&s->receive_queue);
 257         if(p==NULL)
 258                 return NULL;
 259         do
 260         {
 261                 if(p->sk->state == TCP_ESTABLISHED || p->sk->state >= TCP_FIN_WAIT1)
 262                         return p;
 263                 p=p->next;
 264         }
 265         while(p!=skb_peek(&s->receive_queue));
 266         return NULL;
 267 }
 268 
 269 
 270 /* 
 271  *      This routine closes sockets which have been at least partially
 272  *      opened, but not yet accepted. Currently it is only called by
 273  *      tcp_close, and timeout mirrors the value there. 
 274  */
 275 
 276 static void tcp_close_pending (struct sock *sk, int timeout) 
     /* [previous][next][first][last][top][bottom][index][help] */
 277 {
 278         unsigned long flags;
 279         struct sk_buff *p, *old_p;
 280 
 281         save_flags(flags);
 282         cli(); 
 283         p=skb_peek(&sk->receive_queue);
 284 
 285         if(p==NULL) 
 286         {
 287                 restore_flags(flags);
 288                 return;
 289         }
 290 
 291         do
 292         {
 293                 tcp_close (p->sk, timeout);
 294                 skb_unlink (p);
 295                 old_p = p;
 296                 p=p->next;
 297                 kfree_skb(old_p, FREE_READ);
 298         }
 299         while(p!=skb_peek(&sk->receive_queue));
 300 
 301         restore_flags(flags);
 302         return;
 303 }
 304 
 305 static struct sk_buff *tcp_dequeue_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 306 {
 307         struct sk_buff *skb;
 308         unsigned long flags;
 309         save_flags(flags);
 310         cli(); 
 311         skb=tcp_find_established(s);
 312         if(skb!=NULL)
 313                 skb_unlink(skb);        /* Take it off the queue */
 314         restore_flags(flags);
 315         return skb;
 316 }
 317 
 318 
 319 /*
 320  *      Enter the time wait state. 
 321  */
 322 
 323 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 324 {
 325         tcp_set_state(sk,TCP_TIME_WAIT);
 326         sk->shutdown = SHUTDOWN_MASK;
 327         if (!sk->dead)
 328                 sk->state_change(sk);
 329         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 330 }
 331 
 332 /*
 333  *      A timer event has trigger a tcp retransmit timeout. The
 334  *      socket xmit queue is ready and set up to send. Because
 335  *      the ack receive code keeps the queue straight we do
 336  *      nothing clever here.
 337  */
 338 
 339 static void tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 340 {
 341         if (all) 
 342         {
 343                 ip_retransmit(sk, all);
 344                 return;
 345         }
 346 
 347         sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 348         /* sk->ssthresh in theory can be zero.  I guess that's OK */
 349         sk->cong_count = 0;
 350 
 351         sk->cong_window = 1;
 352 
 353         /* Do the actual retransmit. */
 354         ip_retransmit(sk, all);
 355 }
 356 
 357 
 358 /*
 359  * This routine is called by the ICMP module when it gets some
 360  * sort of error condition.  If err < 0 then the socket should
 361  * be closed and the error returned to the user.  If err > 0
 362  * it's just the icmp type << 8 | icmp code.  After adjustment
 363  * header points to the first 8 bytes of the tcp header.  We need
 364  * to find the appropriate port.
 365  */
 366 
 367 void tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 368         unsigned long saddr, struct inet_protocol *protocol)
 369 {
 370         struct tcphdr *th;
 371         struct sock *sk;
 372         struct iphdr *iph=(struct iphdr *)header;
 373   
 374         header+=4*iph->ihl;
 375    
 376 
 377         th =(struct tcphdr *)header;
 378         sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr);
 379 
 380         if (sk == NULL) 
 381                 return;
 382   
 383         if(err<0)
 384         {
 385                 sk->err = -err;
 386                 sk->error_report(sk);
 387                 return;
 388         }
 389 
 390         if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) 
 391         {
 392                 /*
 393                  * FIXME:
 394                  * For now we will just trigger a linear backoff.
 395                  * The slow start code should cause a real backoff here.
 396                  */
 397                 if (sk->cong_window > 4)
 398                         sk->cong_window--;
 399                 return;
 400         }
 401 
 402 /*      sk->err = icmp_err_convert[err & 0xff].errno;  -- moved as TCP should hide non fatals internally (and does) */
 403 
 404         /*
 405          * If we've already connected we will keep trying
 406          * until we time out, or the user gives up.
 407          */
 408 
 409         if (icmp_err_convert[err & 0xff].fatal || sk->state == TCP_SYN_SENT) 
 410         {
 411                 if (sk->state == TCP_SYN_SENT) 
 412                 {
 413                         tcp_statistics.TcpAttemptFails++;
 414                         tcp_set_state(sk,TCP_CLOSE);
 415                         sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 416                 }
 417                 sk->err = icmp_err_convert[err & 0xff].errno;           
 418         }
 419         return;
 420 }
 421 
 422 
 423 /*
 424  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 425  *      in the received data queue (ie a frame missing that needs sending to us)
 426  */
 427 
 428 static int tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 429 {
 430         unsigned long counted;
 431         unsigned long amount;
 432         struct sk_buff *skb;
 433         int sum;
 434         unsigned long flags;
 435 
 436         if(sk && sk->debug)
 437                 printk("tcp_readable: %p - ",sk);
 438 
 439         save_flags(flags);
 440         cli();
 441         if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 442         {
 443                 restore_flags(flags);
 444                 if(sk && sk->debug) 
 445                         printk("empty\n");
 446                 return(0);
 447         }
 448   
 449         counted = sk->copied_seq+1;     /* Where we are at the moment */
 450         amount = 0;
 451   
 452         /* Do until a push or until we are out of data. */
 453         do 
 454         {
 455                 if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 456                         break;
 457                 sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 458                 if (skb->h.th->syn)
 459                         sum++;
 460                 if (sum > 0) 
 461                 {                                       /* Add it up, move on */
 462                         amount += sum;
 463                         if (skb->h.th->syn) 
 464                                 amount--;
 465                         counted += sum;
 466                 }
 467                 /*
 468                  * Don't count urg data ... but do it in the right place!
 469                  * Consider: "old_data (ptr is here) URG PUSH data"
 470                  * The old code would stop at the first push because
 471                  * it counted the urg (amount==1) and then does amount--
 472                  * *after* the loop.  This means tcp_readable() always
 473                  * returned zero if any URG PUSH was in the queue, even
 474                  * though there was normal data available. If we subtract
 475                  * the urg data right here, we even get it to work for more
 476                  * than one URG PUSH skb without normal data.
 477                  * This means that select() finally works now with urg data
 478                  * in the queue.  Note that rlogin was never affected
 479                  * because it doesn't use select(); it uses two processes
 480                  * and a blocking read().  And the queue scan in tcp_read()
 481                  * was correct.  Mike <pall@rz.uni-karlsruhe.de>
 482                  */
 483                 if (skb->h.th->urg)
 484                         amount--;       /* don't count urg data */
 485                 if (amount && skb->h.th->psh) break;
 486                 skb = skb->next;
 487         }
 488         while(skb != (struct sk_buff *)&sk->receive_queue);
 489 
 490         restore_flags(flags);
 491         if(sk->debug)
 492                 printk("got %lu bytes.\n",amount);
 493         return(amount);
 494 }
 495 
 496 
 497 /*
 498  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 499  *      listening socket has a receive queue of sockets to accept.
 500  */
 501 
 502 static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 503 {
 504         sk->inuse = 1;
 505 
 506         switch(sel_type) 
 507         {
 508                 case SEL_IN:
 509                         select_wait(sk->sleep, wait);
 510                         if (skb_peek(&sk->receive_queue) != NULL) 
 511                         {
 512                                 if ((sk->state == TCP_LISTEN && tcp_find_established(sk)) || tcp_readable(sk)) 
 513                                 {
 514                                         release_sock(sk);
 515                                         return(1);
 516                                 }
 517                         }
 518                         if (sk->err != 0)       /* Receiver error */
 519                         {
 520                                 release_sock(sk);
 521                                 return(1);
 522                         }
 523                         if (sk->shutdown & RCV_SHUTDOWN) 
 524                         {
 525                                 release_sock(sk);
 526                                 return(1);
 527                         } 
 528                         release_sock(sk);
 529                         return(0);
 530                 case SEL_OUT:
 531                         select_wait(sk->sleep, wait);
 532                         if (sk->shutdown & SEND_SHUTDOWN) 
 533                         {
 534                                 /* FIXME: should this return an error? */
 535                                 release_sock(sk);
 536                                 return(0);
 537                         }
 538 
 539                         /*
 540                          * This is now right thanks to a small fix
 541                          * by Matt Dillon.
 542                          */
 543                         
 544                         if (sk->prot->wspace(sk) >= sk->mtu+128+sk->prot->max_header) 
 545                         {
 546                                 release_sock(sk);
 547                                 /* This should cause connect to work ok. */
 548                                 if (sk->state == TCP_SYN_RECV ||
 549                                     sk->state == TCP_SYN_SENT) return(0);
 550                                 return(1);
 551                         }
 552                         release_sock(sk);
 553                         return(0);
 554                 case SEL_EX:
 555                         select_wait(sk->sleep,wait);
 556                         if (sk->err || sk->urg_data) 
 557                         {
 558                                 release_sock(sk);
 559                                 return(1);
 560                         }
 561                         release_sock(sk);
 562                         return(0);
 563         }
 564 
 565         release_sock(sk);
 566         return(0);
 567 }
 568 
 569 
 570 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 571 {
 572         int err;
 573         switch(cmd) 
 574         {
 575 
 576                 case TIOCINQ:
 577 #ifdef FIXME    /* FIXME: */
 578                 case FIONREAD:
 579 #endif
 580                 {
 581                         unsigned long amount;
 582 
 583                         if (sk->state == TCP_LISTEN) 
 584                                 return(-EINVAL);
 585 
 586                         sk->inuse = 1;
 587                         amount = tcp_readable(sk);
 588                         release_sock(sk);
 589                         err=verify_area(VERIFY_WRITE,(void *)arg,
 590                                                    sizeof(unsigned long));
 591                         if(err)
 592                                 return err;
 593                         put_fs_long(amount,(unsigned long *)arg);
 594                         return(0);
 595                 }
 596                 case SIOCATMARK:
 597                 {
 598                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
 599 
 600                         err = verify_area(VERIFY_WRITE,(void *) arg,
 601                                                   sizeof(unsigned long));
 602                         if (err)
 603                                 return err;
 604                         put_fs_long(answ,(int *) arg);
 605                         return(0);
 606                 }
 607                 case TIOCOUTQ:
 608                 {
 609                         unsigned long amount;
 610 
 611                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 612                         amount = sk->prot->wspace(sk);
 613                         err=verify_area(VERIFY_WRITE,(void *)arg,
 614                                                    sizeof(unsigned long));
 615                         if(err)
 616                                 return err;
 617                         put_fs_long(amount,(unsigned long *)arg);
 618                         return(0);
 619                 }
 620                 default:
 621                         return(-EINVAL);
 622         }
 623 }
 624 
 625 
 626 /*
 627  *      This routine computes a TCP checksum. 
 628  */
 629  
 630 unsigned short tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 631           unsigned long saddr, unsigned long daddr)
 632 {     
 633         unsigned long sum;
 634    
 635         if (saddr == 0) saddr = ip_my_addr();
 636 
 637 /*
 638  * stupid, gcc complains when I use just one __asm__ block,
 639  * something about too many reloads, but this is just two
 640  * instructions longer than what I want
 641  */
 642         __asm__("
 643             addl %%ecx, %%ebx
 644             adcl %%edx, %%ebx
 645             adcl $0, %%ebx
 646             "
 647         : "=b"(sum)
 648         : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 649         : "bx", "cx", "dx" );
 650         __asm__("
 651             movl %%ecx, %%edx
 652             cld
 653             cmpl $32, %%ecx
 654             jb 2f
 655             shrl $5, %%ecx
 656             clc
 657 1:          lodsl
 658             adcl %%eax, %%ebx
 659             lodsl
 660             adcl %%eax, %%ebx
 661             lodsl
 662             adcl %%eax, %%ebx
 663             lodsl
 664             adcl %%eax, %%ebx
 665             lodsl
 666             adcl %%eax, %%ebx
 667             lodsl
 668             adcl %%eax, %%ebx
 669             lodsl
 670             adcl %%eax, %%ebx
 671             lodsl
 672             adcl %%eax, %%ebx
 673             loop 1b
 674             adcl $0, %%ebx
 675             movl %%edx, %%ecx
 676 2:          andl $28, %%ecx
 677             je 4f
 678             shrl $2, %%ecx
 679             clc
 680 3:          lodsl
 681             adcl %%eax, %%ebx
 682             loop 3b
 683             adcl $0, %%ebx
 684 4:          movl $0, %%eax
 685             testw $2, %%dx
 686             je 5f
 687             lodsw
 688             addl %%eax, %%ebx
 689             adcl $0, %%ebx
 690             movw $0, %%ax
 691 5:          test $1, %%edx
 692             je 6f
 693             lodsb
 694             addl %%eax, %%ebx
 695             adcl $0, %%ebx
 696 6:          movl %%ebx, %%eax
 697             shrl $16, %%eax
 698             addw %%ax, %%bx
 699             adcw $0, %%bx
 700             "
 701         : "=b"(sum)
 702         : "0"(sum), "c"(len), "S"(th)
 703         : "ax", "bx", "cx", "dx", "si" );
 704 
 705         /* We only want the bottom 16 bits, but we never cleared the top 16. */
 706   
 707         return((~sum) & 0xffff);
 708 }
 709 
 710 
 711 
 712 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 713                 unsigned long daddr, int len, struct sock *sk)
 714 {
 715         th->check = 0;
 716         th->check = tcp_check(th, len, saddr, daddr);
 717         return;
 718 }
 719 
 720 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 721 {
 722         int size;
 723         struct tcphdr * th = skb->h.th;
 724 
 725         /* length of packet (not counting length of pre-tcp headers) */
 726         size = skb->len - ((unsigned char *) th - skb->data);
 727 
 728         /* sanity check it.. */
 729         if (size < sizeof(struct tcphdr) || size > skb->len) 
 730         {
 731                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 732                         skb, skb->data, th, skb->len);
 733                 kfree_skb(skb, FREE_WRITE);
 734                 return;
 735         }
 736 
 737         /* If we have queued a header size packet.. */
 738         if (size == sizeof(struct tcphdr)) 
 739         {
 740                 /* If its got a syn or fin its notionally included in the size..*/
 741                 if(!th->syn && !th->fin) 
 742                 {
 743                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 744                         kfree_skb(skb,FREE_WRITE);
 745                         return;
 746                 }
 747         }
 748 
 749         tcp_statistics.TcpOutSegs++;  
 750 
 751         skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
 752         if (after(skb->h.seq, sk->window_seq) ||
 753             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 754              sk->packets_out >= sk->cong_window) 
 755         {
 756                 /* checksum will be supplied by tcp_write_xmit.  So
 757                  * we shouldn't need to set it at all.  I'm being paranoid */
 758                 th->check = 0;
 759                 if (skb->next != NULL) 
 760                 {
 761                         printk("tcp_send_partial: next != NULL\n");
 762                         skb_unlink(skb);
 763                 }
 764                 skb_queue_tail(&sk->write_queue, skb);
 765                 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 766                     sk->send_head == NULL &&
 767                     sk->ack_backlog == 0)
 768                         reset_timer(sk, TIME_PROBE0, sk->rto);
 769         } 
 770         else 
 771         {
 772                 th->ack_seq = ntohl(sk->acked_seq);
 773                 th->window = ntohs(tcp_select_window(sk));
 774 
 775                 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
 776 
 777                 sk->sent_seq = sk->write_seq;
 778                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 779         }
 780 }
 781 
 782 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 783 {
 784         struct sk_buff * skb;
 785         unsigned long flags;
 786 
 787         save_flags(flags);
 788         cli();
 789         skb = sk->partial;
 790         if (skb) {
 791                 sk->partial = NULL;
 792                 del_timer(&sk->partial_timer);
 793         }
 794         restore_flags(flags);
 795         return skb;
 796 }
 797 
 798 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 799 {
 800         struct sk_buff *skb;
 801 
 802         if (sk == NULL)
 803                 return;
 804         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 805                 tcp_send_skb(sk, skb);
 806 }
 807 
 808 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 809 {
 810         struct sk_buff * tmp;
 811         unsigned long flags;
 812 
 813         save_flags(flags);
 814         cli();
 815         tmp = sk->partial;
 816         if (tmp)
 817                 del_timer(&sk->partial_timer);
 818         sk->partial = skb;
 819         init_timer(&sk->partial_timer);
 820         sk->partial_timer.expires = HZ;
 821         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 822         sk->partial_timer.data = (unsigned long) sk;
 823         add_timer(&sk->partial_timer);
 824         restore_flags(flags);
 825         if (tmp)
 826                 tcp_send_skb(sk, tmp);
 827 }
 828 
 829 
 830 /*
 831  *      This routine sends an ack and also updates the window. 
 832  */
 833  
 834 static void tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 835              struct sock *sk,
 836              struct tcphdr *th, unsigned long daddr)
 837 {
 838         struct sk_buff *buff;
 839         struct tcphdr *t1;
 840         struct device *dev = NULL;
 841         int tmp;
 842 
 843         if(sk->zapped)
 844                 return;         /* We have been reset, we may not send again */
 845         /*
 846          * We need to grab some memory, and put together an ack,
 847          * and then put it into the queue to be sent.
 848          */
 849 
 850         buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 851         if (buff == NULL) 
 852         {
 853                 /* Force it to send an ack. */
 854                 sk->ack_backlog++;
 855                 if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) 
 856                 {
 857                         reset_timer(sk, TIME_WRITE, 10);
 858                 }
 859                 return;
 860         }
 861 
 862         buff->len = sizeof(struct tcphdr);
 863         buff->sk = sk;
 864         buff->localroute = sk->localroute;
 865         t1 =(struct tcphdr *) buff->data;
 866 
 867         /* Put in the IP header and routing stuff. */
 868         tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 869                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 870         if (tmp < 0) 
 871         {
 872                 buff->free=1;
 873                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 874                 return;
 875         }
 876         buff->len += tmp;
 877         t1 =(struct tcphdr *)((char *)t1 +tmp);
 878 
 879         /* FIXME: */
 880         memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 881 
 882         /*
 883          *      Swap the send and the receive. 
 884          */
 885          
 886         t1->dest = th->source;
 887         t1->source = th->dest;
 888         t1->seq = ntohl(sequence);
 889         t1->ack = 1;
 890         sk->window = tcp_select_window(sk);
 891         t1->window = ntohs(sk->window);
 892         t1->res1 = 0;
 893         t1->res2 = 0;
 894         t1->rst = 0;
 895         t1->urg = 0;
 896         t1->syn = 0;
 897         t1->psh = 0;
 898         t1->fin = 0;
 899         if (ack == sk->acked_seq) 
 900         {
 901                 sk->ack_backlog = 0;
 902                 sk->bytes_rcv = 0;
 903                 sk->ack_timed = 0;
 904                 if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
 905                                   && sk->timeout == TIME_WRITE) 
 906                 {
 907                         if(sk->keepopen) {
 908                                 reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 909                         } else {
 910                                 delete_timer(sk);
 911                         }
 912                 }
 913         }
 914         t1->ack_seq = ntohl(ack);
 915         t1->doff = sizeof(*t1)/4;
 916         tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 917         if (sk->debug)
 918                  printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 919         tcp_statistics.TcpOutSegs++;
 920         sk->prot->queue_xmit(sk, dev, buff, 1);
 921 }
 922 
 923 
 924 /* 
 925  *      This routine builds a generic TCP header. 
 926  */
 927  
 928 extern __inline int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 929 {
 930 
 931         /* FIXME: want to get rid of this. */
 932         memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 933         th->seq = htonl(sk->write_seq);
 934         th->psh =(push == 0) ? 1 : 0;
 935         th->doff = sizeof(*th)/4;
 936         th->ack = 1;
 937         th->fin = 0;
 938         sk->ack_backlog = 0;
 939         sk->bytes_rcv = 0;
 940         sk->ack_timed = 0;
 941         th->ack_seq = htonl(sk->acked_seq);
 942         sk->window = tcp_select_window(sk);
 943         th->window = htons(sk->window);
 944 
 945         return(sizeof(*th));
 946 }
 947 
 948 /*
 949  *      This routine copies from a user buffer into a socket,
 950  *      and starts the transmit system.
 951  */
 952 
 953 static int tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 954           int len, int nonblock, unsigned flags)
 955 {
 956         int copied = 0;
 957         int copy;
 958         int tmp;
 959         struct sk_buff *skb;
 960         struct sk_buff *send_tmp;
 961         unsigned char *buff;
 962         struct proto *prot;
 963         struct device *dev = NULL;
 964 
 965         sk->inuse=1;
 966         prot = sk->prot;
 967         while(len > 0) 
 968         {
 969                 if (sk->err) 
 970                 {                       /* Stop on an error */
 971                         release_sock(sk);
 972                         if (copied) 
 973                                 return(copied);
 974                         tmp = -sk->err;
 975                         sk->err = 0;
 976                         return(tmp);
 977                 }
 978 
 979         /*
 980          *      First thing we do is make sure that we are established. 
 981          */
 982         
 983                 if (sk->shutdown & SEND_SHUTDOWN) 
 984                 {
 985                         release_sock(sk);
 986                         sk->err = EPIPE;
 987                         if (copied) 
 988                                 return(copied);
 989                         sk->err = 0;
 990                         return(-EPIPE);
 991                 }
 992 
 993 
 994         /* 
 995          *      Wait for a connection to finish.
 996          */
 997         
 998                 while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
 999                 {
1000                         if (sk->err) 
1001                         {
1002                                 release_sock(sk);
1003                                 if (copied) 
1004                                         return(copied);
1005                                 tmp = -sk->err;
1006                                 sk->err = 0;
1007                                 return(tmp);
1008                         }
1009 
1010                         if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
1011                         {
1012                                 release_sock(sk);
1013                                 if (copied) 
1014                                         return(copied);
1015 
1016                                 if (sk->err) 
1017                                 {
1018                                         tmp = -sk->err;
1019                                         sk->err = 0;
1020                                         return(tmp);
1021                                 }
1022 
1023                                 if (sk->keepopen) 
1024                                 {
1025                                         send_sig(SIGPIPE, current, 0);
1026                                 }
1027                                 return(-EPIPE);
1028                         }
1029 
1030                         if (nonblock || copied) 
1031                         {
1032                                 release_sock(sk);
1033                                 if (copied) 
1034                                         return(copied);
1035                                 return(-EAGAIN);
1036                         }
1037 
1038                         release_sock(sk);
1039                         cli();
1040                 
1041                         if (sk->state != TCP_ESTABLISHED &&
1042                                 sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
1043                         {
1044                                 interruptible_sleep_on(sk->sleep);
1045                                 if (current->signal & ~current->blocked) 
1046                                 {
1047                                         sti();
1048                                         if (copied) 
1049                                                 return(copied);
1050                                         return(-ERESTARTSYS);
1051                                 }
1052                         }
1053                         sk->inuse = 1;
1054                         sti();
1055                 }
1056 
1057         /*
1058          * The following code can result in copy <= if sk->mss is ever
1059          * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
1060          * sk->mtu is constant once SYN processing is finished.  I.e. we
1061          * had better not get here until we've seen his SYN and at least one
1062          * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
1063          * But ESTABLISHED should guarantee that.  sk->max_window is by definition
1064          * non-decreasing.  Note that any ioctl to set user_mss must be done
1065          * before the exchange of SYN's.  If the initial ack from the other
1066          * end has a window of 0, max_window and thus mss will both be 0.
1067          */
1068 
1069         /* 
1070          *      Now we need to check if we have a half built packet. 
1071          */
1072 
1073                 if ((skb = tcp_dequeue_partial(sk)) != NULL) 
1074                 {
1075                         int hdrlen;
1076 
1077                          /* IP header + TCP header */
1078                         hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
1079                                  + sizeof(struct tcphdr);
1080         
1081                         /* Add more stuff to the end of skb->len */
1082                         if (!(flags & MSG_OOB)) 
1083                         {
1084                                 copy = min(sk->mss - (skb->len - hdrlen), len);
1085                                 /* FIXME: this is really a bug. */
1086                                 if (copy <= 0) 
1087                                 {
1088                                         printk("TCP: **bug**: \"copy\" <= 0!!\n");
1089                                         copy = 0;
1090                                 }
1091           
1092                                 memcpy_fromfs(skb->data + skb->len, from, copy);
1093                                 skb->len += copy;
1094                                 from += copy;
1095                                 copied += copy;
1096                                 len -= copy;
1097                                 sk->write_seq += copy;
1098                         }
1099                         if ((skb->len - hdrlen) >= sk->mss ||
1100                                 (flags & MSG_OOB) || !sk->packets_out)
1101                                 tcp_send_skb(sk, skb);
1102                         else
1103                                 tcp_enqueue_partial(skb, sk);
1104                         continue;
1105                 }
1106 
1107         /*
1108          * We also need to worry about the window.
1109          * If window < 1/2 the maximum window we've seen from this
1110          *   host, don't use it.  This is sender side
1111          *   silly window prevention, as specified in RFC1122.
1112          *   (Note that this is different than earlier versions of
1113          *   SWS prevention, e.g. RFC813.).  What we actually do is 
1114          *   use the whole MSS.  Since the results in the right
1115          *   edge of the packet being outside the window, it will
1116          *   be queued for later rather than sent.
1117          */
1118 
1119                 copy = sk->window_seq - sk->write_seq;
1120                 if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
1121                         copy = sk->mss;
1122                 if (copy > len)
1123                         copy = len;
1124 
1125         /*
1126          *      We should really check the window here also. 
1127          */
1128          
1129                 send_tmp = NULL;
1130                 if (copy < sk->mss && !(flags & MSG_OOB)) 
1131                 {
1132                         /*
1133                          *      We will release the socket incase we sleep here. 
1134                          */
1135                         release_sock(sk);
1136                         /*
1137                          *      NB: following must be mtu, because mss can be increased.
1138                          *      mss is always <= mtu 
1139                          */
1140                         skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
1141                         sk->inuse = 1;
1142                         send_tmp = skb;
1143                 } 
1144                 else 
1145                 {
1146                         /*
1147                          *      We will release the socket incase we sleep here. 
1148                          */
1149                         release_sock(sk);
1150                         skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
1151                         sk->inuse = 1;
1152                 }
1153 
1154                 /*
1155                  *      If we didn't get any memory, we need to sleep. 
1156                  */
1157 
1158                 if (skb == NULL) 
1159                 {
1160                         if (nonblock) 
1161                         {
1162                                 release_sock(sk);
1163                                 if (copied) 
1164                                         return(copied);
1165                                 return(-EAGAIN);
1166                         }
1167 
1168                         /*
1169                          *      FIXME: here is another race condition. 
1170                          */
1171 
1172                         tmp = sk->wmem_alloc;
1173                         release_sock(sk);
1174                         cli();
1175                         /*
1176                          *      Again we will try to avoid it. 
1177                          */
1178                         if (tmp <= sk->wmem_alloc &&
1179                                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
1180                                 && sk->err == 0) 
1181                         {
1182                                 interruptible_sleep_on(sk->sleep);
1183                                 if (current->signal & ~current->blocked) 
1184                                 {
1185                                         sti();
1186                                         if (copied) 
1187                                                 return(copied);
1188                                         return(-ERESTARTSYS);
1189                                 }
1190                         }
1191                         sk->inuse = 1;
1192                         sti();
1193                         continue;
1194                 }
1195 
1196                 skb->len = 0;
1197                 skb->sk = sk;
1198                 skb->free = 0;
1199                 skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
1200         
1201                 buff = skb->data;
1202         
1203                 /*
1204                  * FIXME: we need to optimize this.
1205                  * Perhaps some hints here would be good.
1206                  */
1207                 
1208                 tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1209                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1210                 if (tmp < 0 ) 
1211                 {
1212                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1213                         release_sock(sk);
1214                         if (copied) 
1215                                 return(copied);
1216                         return(tmp);
1217                 }
1218                 skb->len += tmp;
1219                 skb->dev = dev;
1220                 buff += tmp;
1221                 skb->h.th =(struct tcphdr *) buff;
1222                 tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1223                 if (tmp < 0) 
1224                 {
1225                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1226                         release_sock(sk);
1227                         if (copied) 
1228                                 return(copied);
1229                         return(tmp);
1230                 }
1231 
1232                 if (flags & MSG_OOB) 
1233                 {
1234                         ((struct tcphdr *)buff)->urg = 1;
1235                         ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1236                 }
1237                 skb->len += tmp;
1238                 memcpy_fromfs(buff+tmp, from, copy);
1239 
1240                 from += copy;
1241                 copied += copy;
1242                 len -= copy;
1243                 skb->len += copy;
1244                 skb->free = 0;
1245                 sk->write_seq += copy;
1246         
1247                 if (send_tmp != NULL && sk->packets_out) 
1248                 {
1249                         tcp_enqueue_partial(send_tmp, sk);
1250                         continue;
1251                 }
1252                 tcp_send_skb(sk, skb);
1253         }
1254         sk->err = 0;
1255 
1256 /*
1257  *      Nagle's rule. Turn Nagle off with TCP_NODELAY for highly
1258  *      interactive fast network servers. It's meant to be on and
1259  *      it really improves the throughput though not the echo time
1260  *      on my slow slip link - Alan
1261  */
1262 
1263 /*
1264  *      Avoid possible race on send_tmp - c/o Johannes Stille 
1265  */
1266  
1267         if(sk->partial && ((!sk->packets_out) 
1268      /* If not nagling we can send on the before case too.. */
1269               || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1270         ))
1271                 tcp_send_partial(sk);
1272 
1273         release_sock(sk);
1274         return(copied);
1275 }
1276 
1277 
1278 static int tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1279            int len, int nonblock, unsigned flags,
1280            struct sockaddr_in *addr, int addr_len)
1281 {
1282         if (flags & ~(MSG_OOB|MSG_DONTROUTE))
1283                 return -EINVAL;
1284         if (sk->state == TCP_CLOSE)
1285                 return -ENOTCONN;
1286         if (addr_len < sizeof(*addr))
1287                 return -EINVAL;
1288         if (addr->sin_family && addr->sin_family != AF_INET) 
1289                 return -EINVAL;
1290         if (addr->sin_port != sk->dummy_th.dest) 
1291                 return -EISCONN;
1292         if (addr->sin_addr.s_addr != sk->daddr) 
1293                 return -EISCONN;
1294         return tcp_write(sk, from, len, nonblock, flags);
1295 }
1296 
1297 
1298 static void tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1299 {
1300         int tmp;
1301         struct device *dev = NULL;
1302         struct tcphdr *t1;
1303         struct sk_buff *buff;
1304 
1305         if (!sk->ack_backlog) 
1306                 return;
1307 
1308         /*
1309          * FIXME: we need to put code here to prevent this routine from
1310          * being called.  Being called once in a while is ok, so only check
1311          * if this is the second time in a row.
1312          */
1313 
1314         /*
1315          * We need to grab some memory, and put together an ack,
1316          * and then put it into the queue to be sent.
1317          */
1318 
1319         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1320         if (buff == NULL) 
1321         {
1322                 /* Try again real soon. */
1323                 reset_timer(sk, TIME_WRITE, 10);
1324                 return;
1325         }
1326 
1327         buff->len = sizeof(struct tcphdr);
1328         buff->sk = sk;
1329         buff->localroute = sk->localroute;
1330         
1331         /*
1332          *      Put in the IP header and routing stuff. 
1333          */
1334 
1335         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1336                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1337         if (tmp < 0) 
1338         {
1339                 buff->free=1;
1340                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1341                 return;
1342         }
1343 
1344         buff->len += tmp;
1345         t1 =(struct tcphdr *)(buff->data +tmp);
1346 
1347         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1348         t1->seq = htonl(sk->sent_seq);
1349         t1->ack = 1;
1350         t1->res1 = 0;
1351         t1->res2 = 0;
1352         t1->rst = 0;
1353         t1->urg = 0;
1354         t1->syn = 0;
1355         t1->psh = 0;
1356         sk->ack_backlog = 0;
1357         sk->bytes_rcv = 0;
1358         sk->window = tcp_select_window(sk);
1359         t1->window = ntohs(sk->window);
1360         t1->ack_seq = ntohl(sk->acked_seq);
1361         t1->doff = sizeof(*t1)/4;
1362         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1363         sk->prot->queue_xmit(sk, dev, buff, 1);
1364         tcp_statistics.TcpOutSegs++;
1365 }
1366 
1367 
1368 /*
1369  *      FIXME:
1370  *      This routine frees used buffers.
1371  *      It should consider sending an ACK to let the
1372  *      other end know we now have a bigger window.
1373  */
1374 
1375 static void cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1376 {
1377         unsigned long flags;
1378         unsigned long left;
1379         struct sk_buff *skb;
1380         unsigned long rspace;
1381 
1382         if(sk->debug)
1383                 printk("cleaning rbuf for sk=%p\n", sk);
1384   
1385         save_flags(flags);
1386         cli();
1387   
1388         left = sk->prot->rspace(sk);
1389  
1390         /*
1391          * We have to loop through all the buffer headers,
1392          * and try to free up all the space we can.
1393          */
1394 
1395         while((skb=skb_peek(&sk->receive_queue)) != NULL) 
1396         {
1397                 if (!skb->used) 
1398                         break;
1399                 skb_unlink(skb);
1400                 skb->sk = sk;
1401                 kfree_skb(skb, FREE_READ);
1402         }
1403 
1404         restore_flags(flags);
1405 
1406         /*
1407          * FIXME:
1408          * At this point we should send an ack if the difference
1409          * in the window, and the amount of space is bigger than
1410          * TCP_WINDOW_DIFF.
1411          */
1412 
1413         if(sk->debug)
1414                 printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk),
1415                                             left);
1416         if ((rspace=sk->prot->rspace(sk)) != left) 
1417         {
1418                 /*
1419                  * This area has caused the most trouble.  The current strategy
1420                  * is to simply do nothing if the other end has room to send at
1421                  * least 3 full packets, because the ack from those will auto-
1422                  * matically update the window.  If the other end doesn't think
1423                  * we have much space left, but we have room for at least 1 more
1424                  * complete packet than it thinks we do, we will send an ack
1425                  * immediately.  Otherwise we will wait up to .5 seconds in case
1426                  * the user reads some more.
1427                  */
1428                 sk->ack_backlog++;
1429         /*
1430          * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1431          * if the other end is offering a window smaller than the agreed on MSS
1432          * (called sk->mtu here).  In theory there's no connection between send
1433          * and receive, and so no reason to think that they're going to send
1434          * small packets.  For the moment I'm using the hack of reducing the mss
1435          * only on the send side, so I'm putting mtu here.
1436          */
1437 
1438                 if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) 
1439                 {
1440                         /* Send an ack right now. */
1441                         tcp_read_wakeup(sk);
1442                 } 
1443                 else 
1444                 {
1445                         /* Force it to send an ack soon. */
1446                         int was_active = del_timer(&sk->timer);
1447                         if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
1448                         {
1449                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1450                         } 
1451                         else
1452                                 add_timer(&sk->timer);
1453                 }
1454         }
1455 } 
1456 
1457 
1458 /*
1459  *      Handle reading urgent data. 
1460  */
1461  
1462 static int tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1463              unsigned char *to, int len, unsigned flags)
1464 {
1465         if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1466                 return -EINVAL;
1467         if (sk->err) 
1468         {
1469                 int tmp = -sk->err;
1470                 sk->err = 0;
1471                 return tmp;
1472         }
1473 
1474         if (sk->state == TCP_CLOSE || sk->done) 
1475         {
1476                 if (!sk->done) {
1477                         sk->done = 1;
1478                         return 0;
1479                 }
1480                 return -ENOTCONN;
1481         }
1482 
1483         if (sk->shutdown & RCV_SHUTDOWN) 
1484         {
1485                 sk->done = 1;
1486                 return 0;
1487         }
1488         sk->inuse = 1;
1489         if (sk->urg_data & URG_VALID) 
1490         {
1491                 char c = sk->urg_data;
1492                 if (!(flags & MSG_PEEK))
1493                         sk->urg_data = URG_READ;
1494                 put_fs_byte(c, to);
1495                 release_sock(sk);
1496                 return 1;
1497         }
1498         release_sock(sk);
1499         
1500         /*
1501          * Fixed the recv(..., MSG_OOB) behaviour.  BSD docs and
1502          * the available implementations agree in this case:
1503          * this call should never block, independent of the
1504          * blocking state of the socket.
1505          * Mike <pall@rz.uni-karlsruhe.de>
1506          */
1507         return -EAGAIN;
1508 }
1509 
1510 
1511 /*
1512  *      This routine copies from a sock struct into the user buffer. 
1513  */
1514  
1515 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1516         int len, int nonblock, unsigned flags)
1517 {
1518         struct wait_queue wait = { current, NULL };
1519         int copied = 0;
1520         unsigned long peek_seq;
1521         unsigned long *seq;
1522         unsigned long used;
1523 
1524         /* This error should be checked. */
1525         if (sk->state == TCP_LISTEN)
1526                 return -ENOTCONN;
1527 
1528         /* Urgent data needs to be handled specially. */
1529         if (flags & MSG_OOB)
1530                 return tcp_read_urg(sk, nonblock, to, len, flags);
1531 
1532         peek_seq = sk->copied_seq;
1533         seq = &sk->copied_seq;
1534         if (flags & MSG_PEEK)
1535                 seq = &peek_seq;
1536 
1537         add_wait_queue(sk->sleep, &wait);
1538         sk->inuse = 1;
1539         while (len > 0) 
1540         {
1541                 struct sk_buff * skb;
1542                 unsigned long offset;
1543         
1544                 /*
1545                  * are we at urgent data? Stop if we have read anything.
1546                  */
1547                 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1548                         break;
1549 
1550                 current->state = TASK_INTERRUPTIBLE;
1551 
1552                 skb = skb_peek(&sk->receive_queue);
1553                 do 
1554                 {
1555                         if (!skb)
1556                                 break;
1557                         if (before(1+*seq, skb->h.th->seq))
1558                                 break;
1559                         offset = 1 + *seq - skb->h.th->seq;
1560                         if (skb->h.th->syn)
1561                                 offset--;
1562                         if (offset < skb->len)
1563                                 goto found_ok_skb;
1564                         if (!(flags & MSG_PEEK))
1565                                 skb->used = 1;
1566                         skb = skb->next;
1567                 }
1568                 while (skb != (struct sk_buff *)&sk->receive_queue);
1569 
1570                 if (copied)
1571                         break;
1572 
1573                 if (sk->err) 
1574                 {
1575                         copied = -sk->err;
1576                         sk->err = 0;
1577                         break;
1578                 }
1579 
1580                 if (sk->state == TCP_CLOSE) 
1581                 {
1582                         if (!sk->done) 
1583                         {
1584                                 sk->done = 1;
1585                                 break;
1586                         }
1587                         copied = -ENOTCONN;
1588                         break;
1589                 }
1590 
1591                 if (sk->shutdown & RCV_SHUTDOWN) 
1592                 {
1593                         sk->done = 1;
1594                         break;
1595                 }
1596                         
1597                 if (nonblock) 
1598                 {
1599                         copied = -EAGAIN;
1600                         break;
1601                 }
1602 
1603                 cleanup_rbuf(sk);
1604                 release_sock(sk);
1605                 schedule();
1606                 sk->inuse = 1;
1607 
1608                 if (current->signal & ~current->blocked) 
1609                 {
1610                         copied = -ERESTARTSYS;
1611                         break;
1612                 }
1613                 continue;
1614 
1615         found_ok_skb:
1616                 /* Ok so how much can we use ? */
1617                 used = skb->len - offset;
1618                 if (len < used)
1619                         used = len;
1620                 /* do we have urgent data here? */
1621                 if (sk->urg_data) 
1622                 {
1623                         unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1624                         if (urg_offset < used) 
1625                         {
1626                                 if (!urg_offset) 
1627                                 {
1628                                         if (!sk->urginline) 
1629                                         {
1630                                                 ++*seq;
1631                                                 offset++;
1632                                                 used--;
1633                                         }
1634                                 }
1635                                 else
1636                                         used = urg_offset;
1637                         }
1638                 }
1639                 /* Copy it */
1640                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1641                         skb->h.th->doff*4 + offset, used);
1642                 copied += used;
1643                 len -= used;
1644                 to += used;
1645                 *seq += used;
1646                 if (after(sk->copied_seq+1,sk->urg_seq))
1647                         sk->urg_data = 0;
1648                 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1649                         skb->used = 1;
1650         }
1651         remove_wait_queue(sk->sleep, &wait);
1652         current->state = TASK_RUNNING;
1653 
1654         /* Clean up data we have read: This will do ACK frames */
1655         cleanup_rbuf(sk);
1656         release_sock(sk);
1657         return copied;
1658 }
1659 
1660  
1661 /*
1662  *      Shutdown the sending side of a connection.
1663  */
1664 
1665 void tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1666 {
1667         struct sk_buff *buff;
1668         struct tcphdr *t1, *th;
1669         struct proto *prot;
1670         int tmp;
1671         struct device *dev = NULL;
1672 
1673         /*
1674          * We need to grab some memory, and put together a FIN,
1675          * and then put it into the queue to be sent.
1676          * FIXME:
1677          *
1678          *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1679          *      Most of this is guesswork, so maybe it will work...
1680          */
1681 
1682         if (!(how & SEND_SHUTDOWN)) 
1683                 return;
1684          
1685         /*
1686          *      If we've already sent a FIN, return. 
1687          */
1688          
1689         if (sk->state == TCP_FIN_WAIT1 ||
1690             sk->state == TCP_FIN_WAIT2 ||
1691             sk->state == TCP_CLOSING ||
1692             sk->state == TCP_LAST_ACK ||
1693             sk->state == TCP_TIME_WAIT
1694         ) 
1695         {
1696                 return;
1697         }
1698         sk->inuse = 1;
1699 
1700         /*
1701          * flag that the sender has shutdown
1702          */
1703 
1704         sk->shutdown |= SEND_SHUTDOWN;
1705 
1706         /*
1707          *  Clear out any half completed packets. 
1708          */
1709 
1710         if (sk->partial)
1711                 tcp_send_partial(sk);
1712 
1713         prot =(struct proto *)sk->prot;
1714         th =(struct tcphdr *)&sk->dummy_th;
1715         release_sock(sk); /* incase the malloc sleeps. */
1716         buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1717         if (buff == NULL)
1718                 return;
1719         sk->inuse = 1;
1720 
1721         buff->sk = sk;
1722         buff->len = sizeof(*t1);
1723         buff->localroute = sk->localroute;
1724         t1 =(struct tcphdr *) buff->data;
1725 
1726         /*
1727          *      Put in the IP header and routing stuff. 
1728          */
1729 
1730         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1731                            IPPROTO_TCP, sk->opt,
1732                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1733         if (tmp < 0) 
1734         {
1735                 /*
1736                  *      Finish anyway, treat this as a send that got lost. 
1737                  *
1738                  *      Enter FIN_WAIT1 on normal shutdown, which waits for
1739                  *      written data to be completely acknowledged along
1740                  *      with an acknowledge to our FIN.
1741                  *
1742                  *      Enter FIN_WAIT2 on abnormal shutdown -- close before
1743                  *      connection established.
1744                  */
1745                 buff->free=1;
1746                 prot->wfree(sk,buff->mem_addr, buff->mem_len);
1747 
1748                 if (sk->state == TCP_ESTABLISHED)
1749                         tcp_set_state(sk,TCP_FIN_WAIT1);
1750                 else if(sk->state == TCP_CLOSE_WAIT)
1751                         tcp_set_state(sk,TCP_LAST_ACK);
1752                 else
1753                         tcp_set_state(sk,TCP_FIN_WAIT2);
1754 
1755                 release_sock(sk);
1756                 return;
1757         }
1758 
1759         t1 =(struct tcphdr *)((char *)t1 +tmp);
1760         buff->len += tmp;
1761         buff->dev = dev;
1762         memcpy(t1, th, sizeof(*t1));
1763         t1->seq = ntohl(sk->write_seq);
1764         sk->write_seq++;
1765         buff->h.seq = sk->write_seq;
1766         t1->ack = 1;
1767         t1->ack_seq = ntohl(sk->acked_seq);
1768         t1->window = ntohs(sk->window=tcp_select_window(sk));
1769         t1->fin = 1;
1770         t1->rst = 0;
1771         t1->doff = sizeof(*t1)/4;
1772         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1773 
1774         /*
1775          * If there is data in the write queue, the fin must be appended to
1776          * the write queue.
1777          */
1778         
1779         if (skb_peek(&sk->write_queue) != NULL) 
1780         {
1781                 buff->free=0;
1782                 if (buff->next != NULL) 
1783                 {
1784                         printk("tcp_shutdown: next != NULL\n");
1785                         skb_unlink(buff);
1786                 }
1787                 skb_queue_tail(&sk->write_queue, buff);
1788         } 
1789         else 
1790         {
1791                 sk->sent_seq = sk->write_seq;
1792                 sk->prot->queue_xmit(sk, dev, buff, 0);
1793         }
1794 
1795         if (sk->state == TCP_ESTABLISHED) 
1796                 tcp_set_state(sk,TCP_FIN_WAIT1);
1797         else if (sk->state == TCP_CLOSE_WAIT)
1798                 tcp_set_state(sk,TCP_LAST_ACK);
1799         else
1800                 tcp_set_state(sk,TCP_FIN_WAIT2);
1801 
1802         release_sock(sk);
1803 }
1804 
1805 
1806 static int
1807 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1808              int to_len, int nonblock, unsigned flags,
1809              struct sockaddr_in *addr, int *addr_len)
1810 {
1811         int result;
1812   
1813         /* 
1814          *      Have to check these first unlike the old code. If 
1815          *      we check them after we lose data on an error
1816          *      which is wrong 
1817          */
1818 
1819         if(addr_len)
1820                 *addr_len = sizeof(*addr);
1821         result=tcp_read(sk, to, to_len, nonblock, flags);
1822 
1823         if (result < 0) 
1824                 return(result);
1825   
1826         if(addr)
1827         {
1828                 addr->sin_family = AF_INET;
1829                 addr->sin_port = sk->dummy_th.dest;
1830                 addr->sin_addr.s_addr = sk->daddr;
1831         }
1832         return(result);
1833 }
1834 
1835 
1836 /*
1837  *      This routine will send an RST to the other tcp. 
1838  */
1839  
1840 static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1841           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1842 {
1843         struct sk_buff *buff;
1844         struct tcphdr *t1;
1845         int tmp;
1846         struct device *ndev=NULL;
1847   
1848 /*
1849  * We need to grab some memory, and put together an RST,
1850  * and then put it into the queue to be sent.
1851  */
1852 
1853         buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1854         if (buff == NULL) 
1855                 return;
1856 
1857         buff->len = sizeof(*t1);
1858         buff->sk = NULL;
1859         buff->dev = dev;
1860         buff->localroute = 0;
1861 
1862         t1 =(struct tcphdr *) buff->data;
1863 
1864         /*
1865          *      Put in the IP header and routing stuff. 
1866          */
1867 
1868         tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
1869                            sizeof(struct tcphdr),tos,ttl);
1870         if (tmp < 0) 
1871         {
1872                 buff->free = 1;
1873                 prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1874                 return;
1875         }
1876 
1877         t1 =(struct tcphdr *)((char *)t1 +tmp);
1878         buff->len += tmp;
1879         memcpy(t1, th, sizeof(*t1));
1880 
1881         /*
1882          *      Swap the send and the receive. 
1883          */
1884 
1885         t1->dest = th->source;
1886         t1->source = th->dest;
1887         t1->rst = 1;  
1888         t1->window = 0;
1889   
1890         if(th->ack)
1891         {
1892                 t1->ack = 0;
1893                 t1->seq = th->ack_seq;
1894                 t1->ack_seq = 0;
1895         }
1896         else
1897         {
1898                 t1->ack = 1;
1899                 if(!th->syn)
1900                         t1->ack_seq=htonl(th->seq);
1901                 else
1902                         t1->ack_seq=htonl(th->seq+1);
1903                 t1->seq=0;
1904         }
1905 
1906         t1->syn = 0;
1907         t1->urg = 0;
1908         t1->fin = 0;
1909         t1->psh = 0;
1910         t1->doff = sizeof(*t1)/4;
1911         tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1912         prot->queue_xmit(NULL, ndev, buff, 1);
1913         tcp_statistics.TcpOutSegs++;
1914 }
1915 
1916 
1917 /*
1918  *      Look for tcp options. Parses everything but only knows about MSS.
1919  *      This routine is always called with the packet containing the SYN.
1920  *      However it may also be called with the ack to the SYN.  So you
1921  *      can't assume this is always the SYN.  It's always called after
1922  *      we have set up sk->mtu to our own MTU.
1923  */
1924  
1925 static void tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1926 {
1927         unsigned char *ptr;
1928         int length=(th->doff*4)-sizeof(struct tcphdr);
1929         int mss_seen = 0;
1930     
1931         ptr = (unsigned char *)(th + 1);
1932   
1933         while(length>0)
1934         {
1935                 int opcode=*ptr++;
1936                 int opsize=*ptr++;
1937                 switch(opcode)
1938                 {
1939                         case TCPOPT_EOL:
1940                                 return;
1941                         case TCPOPT_NOP:
1942                                 length-=2;
1943                                 continue;
1944                         
1945                         default:
1946                                 if(opsize<=2)   /* Avoid silly options looping forever */
1947                                         return;
1948                                 switch(opcode)
1949                                 {
1950                                         case TCPOPT_MSS:
1951                                                 if(opsize==4 && th->syn)
1952                                                 {
1953                                                         sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1954                                                         mss_seen = 1;
1955                                                 }
1956                                                 break;
1957                                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1958                                 }
1959                                 ptr+=opsize-2;
1960                                 length-=opsize;
1961                 }
1962         }
1963         if (th->syn) 
1964         {
1965                 if (! mss_seen)
1966                       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1967         }
1968 #ifdef CONFIG_INET_PCTCP
1969         sk->mss = min(sk->max_window >> 1, sk->mtu);
1970 #else    
1971         sk->mss = min(sk->max_window, sk->mtu);
1972 #endif  
1973 }
1974 
1975 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1976 {
1977         dst = ntohl(dst);
1978         if (IN_CLASSA(dst))
1979                 return htonl(IN_CLASSA_NET);
1980         if (IN_CLASSB(dst))
1981                 return htonl(IN_CLASSB_NET);
1982         return htonl(IN_CLASSC_NET);
1983 }
1984 
1985 /*
1986  *      Default sequence number picking algorithm.
1987  */
1988 
1989 extern inline long tcp_init_seq(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1990 {
1991         return jiffies * SEQ_TICK - seq_offset; 
1992 }
1993 
1994 /*
1995  *      This routine handles a connection request.
1996  *      It should make sure we haven't already responded.
1997  *      Because of the way BSD works, we have to send a syn/ack now.
1998  *      This also means it will be harder to close a socket which is
1999  *      listening.
2000  */
2001  
2002 static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
2003                  unsigned long daddr, unsigned long saddr,
2004                  struct options *opt, struct device *dev, unsigned long seq)
2005 {
2006         struct sk_buff *buff;
2007         struct tcphdr *t1;
2008         unsigned char *ptr;
2009         struct sock *newsk;
2010         struct tcphdr *th;
2011         struct device *ndev=NULL;
2012         int tmp;
2013         struct rtable *rt;
2014   
2015         th = skb->h.th;
2016 
2017         /* If the socket is dead, don't accept the connection. */
2018         if (!sk->dead) 
2019         {
2020                 sk->data_ready(sk,0);
2021         }
2022         else 
2023         {
2024                 if(sk->debug)
2025                         printk("Reset on %p: Connect on dead socket.\n",sk);
2026                 tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
2027                 tcp_statistics.TcpAttemptFails++;
2028                 kfree_skb(skb, FREE_READ);
2029                 return;
2030         }
2031 
2032         /*
2033          * Make sure we can accept more.  This will prevent a
2034          * flurry of syns from eating up all our memory.
2035          */
2036 
2037         if (sk->ack_backlog >= sk->max_ack_backlog) 
2038         {
2039                 tcp_statistics.TcpAttemptFails++;
2040                 kfree_skb(skb, FREE_READ);
2041                 return;
2042         }
2043 
2044         /*
2045          * We need to build a new sock struct.
2046          * It is sort of bad to have a socket without an inode attached
2047          * to it, but the wake_up's will just wake up the listening socket,
2048          * and if the listening socket is destroyed before this is taken
2049          * off of the queue, this will take care of it.
2050          */
2051 
2052         newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
2053         if (newsk == NULL) 
2054         {
2055                 /* just ignore the syn.  It will get retransmitted. */
2056                 tcp_statistics.TcpAttemptFails++;
2057                 kfree_skb(skb, FREE_READ);
2058                 return;
2059         }
2060 
2061         memcpy(newsk, sk, sizeof(*newsk));
2062         skb_queue_head_init(&newsk->write_queue);
2063         skb_queue_head_init(&newsk->receive_queue);
2064         newsk->send_head = NULL;
2065         newsk->send_tail = NULL;
2066         skb_queue_head_init(&newsk->back_log);
2067         newsk->rtt = 0;         /*TCP_CONNECT_TIME<<3*/
2068         newsk->rto = TCP_TIMEOUT_INIT;
2069         newsk->mdev = 0;
2070         newsk->max_window = 0;
2071         newsk->cong_window = 1;
2072         newsk->cong_count = 0;
2073         newsk->ssthresh = 0;
2074         newsk->backoff = 0;
2075         newsk->blog = 0;
2076         newsk->intr = 0;
2077         newsk->proc = 0;
2078         newsk->done = 0;
2079         newsk->partial = NULL;
2080         newsk->pair = NULL;
2081         newsk->wmem_alloc = 0;
2082         newsk->rmem_alloc = 0;
2083         newsk->localroute = sk->localroute;
2084 
2085         newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
2086 
2087         newsk->err = 0;
2088         newsk->shutdown = 0;
2089         newsk->ack_backlog = 0;
2090         newsk->acked_seq = skb->h.th->seq+1;
2091         newsk->fin_seq = skb->h.th->seq;
2092         newsk->copied_seq = skb->h.th->seq;
2093         newsk->state = TCP_SYN_RECV;
2094         newsk->timeout = 0;
2095         newsk->write_seq = seq; 
2096         newsk->window_seq = newsk->write_seq;
2097         newsk->rcv_ack_seq = newsk->write_seq;
2098         newsk->urg_data = 0;
2099         newsk->retransmits = 0;
2100         newsk->destroy = 0;
2101         init_timer(&newsk->timer);
2102         newsk->timer.data = (unsigned long)newsk;
2103         newsk->timer.function = &net_timer;
2104         newsk->dummy_th.source = skb->h.th->dest;
2105         newsk->dummy_th.dest = skb->h.th->source;
2106         
2107         /*
2108          *      Swap these two, they are from our point of view. 
2109          */
2110          
2111         newsk->daddr = saddr;
2112         newsk->saddr = daddr;
2113 
2114         put_sock(newsk->num,newsk);
2115         newsk->dummy_th.res1 = 0;
2116         newsk->dummy_th.doff = 6;
2117         newsk->dummy_th.fin = 0;
2118         newsk->dummy_th.syn = 0;
2119         newsk->dummy_th.rst = 0;        
2120         newsk->dummy_th.psh = 0;
2121         newsk->dummy_th.ack = 0;
2122         newsk->dummy_th.urg = 0;
2123         newsk->dummy_th.res2 = 0;
2124         newsk->acked_seq = skb->h.th->seq + 1;
2125         newsk->copied_seq = skb->h.th->seq;
2126         newsk->socket = NULL;
2127 
2128         /*
2129          *      Grab the ttl and tos values and use them 
2130          */
2131 
2132         newsk->ip_ttl=sk->ip_ttl;
2133         newsk->ip_tos=skb->ip_hdr->tos;
2134 
2135         /*
2136          *      Use 512 or whatever user asked for 
2137          */
2138 
2139         /*
2140          *      Note use of sk->user_mss, since user has no direct access to newsk 
2141          */
2142 
2143         rt=ip_rt_route(saddr, NULL,NULL);
2144         
2145         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
2146                 newsk->window_clamp = rt->rt_window;
2147         else
2148                 newsk->window_clamp = 0;
2149                 
2150         if (sk->user_mss)
2151                 newsk->mtu = sk->user_mss;
2152         else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
2153                 newsk->mtu = rt->rt_mss - HEADER_SIZE;
2154         else 
2155         {
2156 #ifdef CONFIG_INET_SNARL        /* Sub Nets Are Local */
2157                 if ((saddr ^ daddr) & default_mask(saddr))
2158 #else
2159                 if ((saddr ^ daddr) & dev->pa_mask)
2160 #endif
2161                         newsk->mtu = 576 - HEADER_SIZE;
2162                 else
2163                         newsk->mtu = MAX_WINDOW;
2164         }
2165 
2166         /*
2167          *      But not bigger than device MTU 
2168          */
2169 
2170         newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
2171 
2172         /*
2173          *      This will min with what arrived in the packet 
2174          */
2175 
2176         tcp_options(newsk,skb->h.th);
2177 
2178         buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
2179         if (buff == NULL) 
2180         {
2181                 sk->err = -ENOMEM;
2182                 newsk->dead = 1;
2183                 release_sock(newsk);
2184                 kfree_skb(skb, FREE_READ);
2185                 tcp_statistics.TcpAttemptFails++;
2186                 return;
2187         }
2188   
2189         buff->len = sizeof(struct tcphdr)+4;
2190         buff->sk = newsk;
2191         buff->localroute = newsk->localroute;
2192 
2193         t1 =(struct tcphdr *) buff->data;
2194 
2195         /*
2196          *      Put in the IP header and routing stuff. 
2197          */
2198 
2199         tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
2200                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2201 
2202         /*
2203          *      Something went wrong. 
2204          */
2205 
2206         if (tmp < 0) 
2207         {
2208                 sk->err = tmp;
2209                 buff->free=1;
2210                 kfree_skb(buff,FREE_WRITE);
2211                 newsk->dead = 1;
2212                 release_sock(newsk);
2213                 skb->sk = sk;
2214                 kfree_skb(skb, FREE_READ);
2215                 tcp_statistics.TcpAttemptFails++;
2216                 return;
2217         }
2218 
2219         buff->len += tmp;
2220         t1 =(struct tcphdr *)((char *)t1 +tmp);
2221   
2222         memcpy(t1, skb->h.th, sizeof(*t1));
2223         buff->h.seq = newsk->write_seq;
2224         /*
2225          *      Swap the send and the receive. 
2226          */
2227         t1->dest = skb->h.th->source;
2228         t1->source = newsk->dummy_th.source;
2229         t1->seq = ntohl(newsk->write_seq++);
2230         t1->ack = 1;
2231         newsk->window = tcp_select_window(newsk);
2232         newsk->sent_seq = newsk->write_seq;
2233         t1->window = ntohs(newsk->window);
2234         t1->res1 = 0;
2235         t1->res2 = 0;
2236         t1->rst = 0;
2237         t1->urg = 0;
2238         t1->psh = 0;
2239         t1->syn = 1;
2240         t1->ack_seq = ntohl(skb->h.th->seq+1);
2241         t1->doff = sizeof(*t1)/4+1;
2242         ptr =(unsigned char *)(t1+1);
2243         ptr[0] = 2;
2244         ptr[1] = 4;
2245         ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2246         ptr[3] =(newsk->mtu) & 0xff;
2247 
2248         tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2249         newsk->prot->queue_xmit(newsk, ndev, buff, 0);
2250 
2251         reset_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT);
2252         skb->sk = newsk;
2253 
2254         /*
2255          *      Charge the sock_buff to newsk. 
2256          */
2257          
2258         sk->rmem_alloc -= skb->mem_len;
2259         newsk->rmem_alloc += skb->mem_len;
2260         
2261         skb_queue_tail(&sk->receive_queue,skb);
2262         sk->ack_backlog++;
2263         release_sock(newsk);
2264         tcp_statistics.TcpOutSegs++;
2265 }
2266 
2267 
2268 static void tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2269 {
2270         struct sk_buff *buff;
2271         struct tcphdr *t1, *th;
2272         struct proto *prot;
2273         struct device *dev=NULL;
2274         int tmp;
2275 
2276         /*
2277          * We need to grab some memory, and put together a FIN, 
2278          * and then put it into the queue to be sent.
2279          */
2280         sk->inuse = 1;
2281         sk->keepopen = 1;
2282         sk->shutdown = SHUTDOWN_MASK;
2283 
2284         if (!sk->dead) 
2285                 sk->state_change(sk);
2286 
2287         if (timeout == 0) 
2288         {
2289                 /*
2290                  *  We need to flush the recv. buffs.  We do this only on the
2291                  *  descriptor close, not protocol-sourced closes, because the
2292                  *  reader process may not have drained the data yet!
2293                  */
2294 
2295                 if (skb_peek(&sk->receive_queue) != NULL) 
2296                 {
2297                         struct sk_buff *skb;
2298                         if(sk->debug)
2299                                 printk("Clean rcv queue\n");
2300                         while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
2301                                 kfree_skb(skb, FREE_READ);
2302                         if(sk->debug)
2303                                 printk("Cleaned.\n");
2304                 }
2305         }
2306 
2307         /*
2308          *      Get rid off any half-completed packets. 
2309          */
2310          
2311         if (sk->partial) 
2312         {
2313                 tcp_send_partial(sk);
2314         }
2315 
2316         switch(sk->state) 
2317         {
2318                 case TCP_FIN_WAIT1:
2319                 case TCP_FIN_WAIT2:
2320                 case TCP_CLOSING:
2321                         /*
2322                          * These states occur when we have already closed out
2323                          * our end.  If there is no timeout, we do not do
2324                          * anything.  We may still be in the middle of sending
2325                          * the remainder of our buffer, for example...
2326                          * resetting the timer would be inappropriate.
2327                          *
2328                          * XXX if retransmit count reaches limit, is tcp_close()
2329                          * called with timeout == 1 ? if not, we need to fix that.
2330                          */
2331                         if (!timeout) {
2332                                 int timer_active;
2333 
2334                                 timer_active = del_timer(&sk->timer);
2335                                 if (timer_active)
2336                                         add_timer(&sk->timer);
2337                                 else
2338                                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2339                         }
2340                         if (timeout) 
2341                                 tcp_time_wait(sk);
2342                         release_sock(sk);
2343                         return; /* break causes a double release - messy */
2344                 case TCP_TIME_WAIT:
2345                 case TCP_LAST_ACK:
2346                         /*
2347                          * A timeout from these states terminates the TCB.
2348                          */
2349                         if (timeout) 
2350                         {
2351                                 tcp_set_state(sk,TCP_CLOSE);
2352                         }
2353                         release_sock(sk);
2354                         return;
2355                 case TCP_LISTEN:
2356                         /* we need to drop any sockets which have been connected,
2357                            but have not yet been accepted. */
2358                         tcp_close_pending(sk, timeout);
2359                         tcp_set_state(sk,TCP_CLOSE);
2360                         release_sock(sk);
2361                         return;
2362                 case TCP_CLOSE:
2363                         release_sock(sk);
2364                         return;
2365                 case TCP_CLOSE_WAIT:
2366                 case TCP_ESTABLISHED:
2367                 case TCP_SYN_SENT:
2368                 case TCP_SYN_RECV:
2369                         prot =(struct proto *)sk->prot;
2370                         th =(struct tcphdr *)&sk->dummy_th;
2371                         buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2372                         if (buff == NULL) 
2373                         {
2374                                 /* This will force it to try again later. */
2375                                 /* Or it would have if someone released the socket
2376                                    first. Anyway it might work now */
2377                                 release_sock(sk);
2378                                 if (sk->state != TCP_CLOSE_WAIT)
2379                                         tcp_set_state(sk,TCP_ESTABLISHED);
2380                                 reset_timer(sk, TIME_CLOSE, 100);
2381                                 return;
2382                         }
2383                         buff->sk = sk;
2384                         buff->free = 1;
2385                         buff->len = sizeof(*t1);
2386                         buff->localroute = sk->localroute;
2387                         t1 =(struct tcphdr *) buff->data;
2388         
2389                         /*
2390                          *      Put in the IP header and routing stuff. 
2391                          */
2392                         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2393                                          IPPROTO_TCP, sk->opt,
2394                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2395                         if (tmp < 0) 
2396                         {
2397                                 sk->write_seq++;        /* Very important 8) */
2398                                 kfree_skb(buff,FREE_WRITE);
2399 
2400                                 /*
2401                                  * Enter FIN_WAIT1 to await completion of
2402                                  * written out data and ACK to our FIN.
2403                                  */
2404 
2405                                 if(sk->state==TCP_ESTABLISHED)
2406                                         tcp_set_state(sk,TCP_FIN_WAIT1);
2407                                 else
2408                                         tcp_set_state(sk,TCP_FIN_WAIT2);
2409                                 reset_timer(sk, TIME_CLOSE,4*sk->rto);
2410                                 if(timeout)
2411                                         tcp_time_wait(sk);
2412 
2413                                 release_sock(sk);
2414                                 return;
2415                         }
2416 
2417                         t1 =(struct tcphdr *)((char *)t1 +tmp);
2418                         buff->len += tmp;
2419                         buff->dev = dev;
2420                         memcpy(t1, th, sizeof(*t1));
2421                         t1->seq = ntohl(sk->write_seq);
2422                         sk->write_seq++;
2423                         buff->h.seq = sk->write_seq;
2424                         t1->ack = 1;
2425         
2426                         /* 
2427                          *      Ack everything immediately from now on. 
2428                          */
2429 
2430                         sk->delay_acks = 0;
2431                         t1->ack_seq = ntohl(sk->acked_seq);
2432                         t1->window = ntohs(sk->window=tcp_select_window(sk));
2433                         t1->fin = 1;
2434                         t1->rst = 0;
2435                         t1->doff = sizeof(*t1)/4;
2436                         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2437 
2438                         tcp_statistics.TcpOutSegs++;
2439         
2440                         if (skb_peek(&sk->write_queue) == NULL) 
2441                         {
2442                                 sk->sent_seq = sk->write_seq;
2443                                 prot->queue_xmit(sk, dev, buff, 0);
2444                         } 
2445                         else 
2446                         {
2447                                 reset_timer(sk, TIME_WRITE, sk->rto);
2448                                 if (buff->next != NULL) 
2449                                 {
2450                                         printk("tcp_close: next != NULL\n");
2451                                         skb_unlink(buff);
2452                                 }
2453                                 skb_queue_tail(&sk->write_queue, buff);
2454                         }
2455 
2456                         /*
2457                          * If established (normal close), enter FIN_WAIT1.
2458                          * If in CLOSE_WAIT, enter LAST_ACK
2459                          * If in CLOSING, remain in CLOSING
2460                          * otherwise enter FIN_WAIT2
2461                          */
2462 
2463                         if (sk->state == TCP_ESTABLISHED)
2464                                 tcp_set_state(sk,TCP_FIN_WAIT1);
2465                         else if (sk->state == TCP_CLOSE_WAIT)
2466                                 tcp_set_state(sk,TCP_LAST_ACK);
2467                         else if (sk->state != TCP_CLOSING)
2468                                 tcp_set_state(sk,TCP_FIN_WAIT2);
2469         }
2470         release_sock(sk);
2471 }
2472 
2473 
2474 /*
2475  * This routine takes stuff off of the write queue,
2476  * and puts it in the xmit queue.
2477  */
2478 static void
2479 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2480 {
2481         struct sk_buff *skb;
2482 
2483         /*
2484          *      The bytes will have to remain here. In time closedown will
2485          *      empty the write queue and all will be happy 
2486          */
2487 
2488         if(sk->zapped)
2489                 return;
2490 
2491         while((skb = skb_peek(&sk->write_queue)) != NULL &&
2492                 before(skb->h.seq, sk->window_seq + 1) &&
2493                 (sk->retransmits == 0 ||
2494                  sk->timeout != TIME_WRITE ||
2495                  before(skb->h.seq, sk->rcv_ack_seq + 1))
2496                 && sk->packets_out < sk->cong_window) 
2497         {
2498                 IS_SKB(skb);
2499                 skb_unlink(skb);
2500                 /* See if we really need to send the packet. */
2501                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) 
2502                 {
2503                         sk->retransmits = 0;
2504                         kfree_skb(skb, FREE_WRITE);
2505                         if (!sk->dead) 
2506                                 sk->write_space(sk);
2507                 } 
2508                 else
2509                 {
2510                         struct tcphdr *th;
2511                         struct iphdr *iph;
2512                         int size;
2513 /*
2514  * put in the ack seq and window at this point rather than earlier,
2515  * in order to keep them monotonic.  We really want to avoid taking
2516  * back window allocations.  That's legal, but RFC1122 says it's frowned on.
2517  * Ack and window will in general have changed since this packet was put
2518  * on the write queue.
2519  */
2520                         iph = (struct iphdr *)(skb->data +
2521                                                skb->dev->hard_header_len);
2522                         th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
2523                         size = skb->len - (((unsigned char *) th) - skb->data);
2524                         
2525                         th->ack_seq = ntohl(sk->acked_seq);
2526                         th->window = ntohs(tcp_select_window(sk));
2527 
2528                         tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
2529 
2530                         sk->sent_seq = skb->h.seq;
2531                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2532                 }
2533         }
2534 }
2535 
2536 
2537 /*
2538  *      This routine deals with incoming acks, but not outgoing ones.
2539  */
2540 
2541 static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2542 {
2543         unsigned long ack;
2544         int flag = 0;
2545 
2546         /* 
2547          * 1 - there was data in packet as well as ack or new data is sent or 
2548          *     in shutdown state
2549          * 2 - data from retransmit queue was acked and removed
2550          * 4 - window shrunk or data from retransmit queue was acked and removed
2551          */
2552 
2553         if(sk->zapped)
2554                 return(1);      /* Dead, cant ack any more so why bother */
2555 
2556         ack = ntohl(th->ack_seq);
2557         if (ntohs(th->window) > sk->max_window) 
2558         {
2559                 sk->max_window = ntohs(th->window);
2560 #ifdef CONFIG_INET_PCTCP
2561                 sk->mss = min(sk->max_window>>1, sk->mtu);
2562 #else
2563                 sk->mss = min(sk->max_window, sk->mtu);
2564 #endif  
2565         }
2566 
2567         if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2568                 sk->retransmits = 0;
2569 
2570         if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
2571         {
2572                 if(sk->debug)
2573                         printk("Ack ignored %lu %lu\n",ack,sk->sent_seq);
2574                         
2575                 /*
2576                  *      Keepalive processing.
2577                  */
2578                  
2579                 if (after(ack, sk->sent_seq) || (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) 
2580                 {
2581                         return(0);
2582                 }
2583                 if (sk->keepopen) 
2584                 {
2585                         if(sk->timeout==TIME_KEEPOPEN)
2586                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2587                 }
2588                 return(1);
2589         }
2590 
2591         if (len != th->doff*4) 
2592                 flag |= 1;
2593 
2594         /* See if our window has been shrunk. */
2595 
2596         if (after(sk->window_seq, ack+ntohs(th->window))) 
2597         {
2598                 /*
2599                  * We may need to move packets from the send queue
2600                  * to the write queue, if the window has been shrunk on us.
2601                  * The RFC says you are not allowed to shrink your window
2602                  * like this, but if the other end does, you must be able
2603                  * to deal with it.
2604                  */
2605                 struct sk_buff *skb;
2606                 struct sk_buff *skb2;
2607                 struct sk_buff *wskb = NULL;
2608         
2609                 skb2 = sk->send_head;
2610                 sk->send_head = NULL;
2611                 sk->send_tail = NULL;
2612         
2613                 flag |= 4;
2614         
2615                 sk->window_seq = ack + ntohs(th->window);
2616                 cli();
2617                 while (skb2 != NULL) 
2618                 {
2619                         skb = skb2;
2620                         skb2 = skb->link3;
2621                         skb->link3 = NULL;
2622                         if (after(skb->h.seq, sk->window_seq)) 
2623                         {
2624                                 if (sk->packets_out > 0) 
2625                                         sk->packets_out--;
2626                                 /* We may need to remove this from the dev send list. */
2627                                 if (skb->next != NULL) 
2628                                 {
2629                                         skb_unlink(skb);                                
2630                                 }
2631                                 /* Now add it to the write_queue. */
2632                                 if (wskb == NULL)
2633                                         skb_queue_head(&sk->write_queue,skb);
2634                                 else
2635                                         skb_append(wskb,skb);
2636                                 wskb = skb;
2637                         } 
2638                         else 
2639                         {
2640                                 if (sk->send_head == NULL) 
2641                                 {
2642                                         sk->send_head = skb;
2643                                         sk->send_tail = skb;
2644                                 }
2645                                 else
2646                                 {
2647                                         sk->send_tail->link3 = skb;
2648                                         sk->send_tail = skb;
2649                                 }
2650                                 skb->link3 = NULL;
2651                         }
2652                 }
2653                 sti();
2654         }
2655 
2656         if (sk->send_tail == NULL || sk->send_head == NULL) 
2657         {
2658                 sk->send_head = NULL;
2659                 sk->send_tail = NULL;
2660                 sk->packets_out= 0;
2661         }
2662 
2663         sk->window_seq = ack + ntohs(th->window);
2664 
2665         /* We don't want too many packets out there. */
2666         if (sk->timeout == TIME_WRITE && 
2667                 sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
2668         {
2669 /* 
2670  * This is Jacobson's slow start and congestion avoidance. 
2671  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2672  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2673  * counter and increment it once every cwnd times.  It's possible
2674  * that this should be done only if sk->retransmits == 0.  I'm
2675  * interpreting "new data is acked" as including data that has
2676  * been retransmitted but is just now being acked.
2677  */
2678                 if (sk->cong_window < sk->ssthresh)  
2679                   /* 
2680                    *    In "safe" area, increase
2681                    */
2682                         sk->cong_window++;
2683                 else 
2684                 {
2685                   /*
2686                    *    In dangerous area, increase slowly.  In theory this is
2687                    *    sk->cong_window += 1 / sk->cong_window
2688                    */
2689                         if (sk->cong_count >= sk->cong_window) 
2690                         {
2691                                 sk->cong_window++;
2692                                 sk->cong_count = 0;
2693                         }
2694                         else 
2695                                 sk->cong_count++;
2696                 }
2697         }
2698 
2699         sk->rcv_ack_seq = ack;
2700 
2701         /*
2702          * if this ack opens up a zero window, clear backoff.  It was
2703          * being used to time the probes, and is probably far higher than
2704          * it needs to be for normal retransmission.
2705          */
2706 
2707         if (sk->timeout == TIME_PROBE0) 
2708         {
2709                 if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
2710                     ! before (sk->window_seq, sk->write_queue.next->h.seq)) 
2711                 {
2712                         sk->retransmits = 0;
2713                         sk->backoff = 0;
2714                   /*
2715                    *    Recompute rto from rtt.  this eliminates any backoff.
2716                    */
2717 
2718                         sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2719                         if (sk->rto > 120*HZ)
2720                                 sk->rto = 120*HZ;
2721                         if (sk->rto < 20)       /* Was 1*HZ, then 1 - turns out we must allow about
2722                                                    .2 of a second because of BSD delayed acks - on a 100Mb/sec link
2723                                                    .2 of a second is going to need huge windows (SIGH) */
2724                                 sk->rto = 20;
2725                 }
2726         }
2727 
2728   /* 
2729    *    See if we can take anything off of the retransmit queue.
2730    */
2731    
2732         while(sk->send_head != NULL) 
2733         {
2734                 /* Check for a bug. */
2735                 if (sk->send_head->link3 &&
2736                     after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) 
2737                         printk("INET: tcp.c: *** bug send_list out of order.\n");
2738                 if (before(sk->send_head->h.seq, ack+1)) 
2739                 {
2740                         struct sk_buff *oskb;   
2741                         if (sk->retransmits) 
2742                         {       
2743                                 /*
2744                                  *      We were retransmitting.  don't count this in RTT est 
2745                                  */
2746                                 flag |= 2;
2747 
2748                                 /*
2749                                  * even though we've gotten an ack, we're still
2750                                  * retransmitting as long as we're sending from
2751                                  * the retransmit queue.  Keeping retransmits non-zero
2752                                  * prevents us from getting new data interspersed with
2753                                  * retransmissions.
2754                                  */
2755 
2756                                 if (sk->send_head->link3)
2757                                         sk->retransmits = 1;
2758                                 else
2759                                         sk->retransmits = 0;
2760                         }
2761                         /*
2762                          * Note that we only reset backoff and rto in the
2763                          * rtt recomputation code.  And that doesn't happen
2764                          * if there were retransmissions in effect.  So the
2765                          * first new packet after the retransmissions is
2766                          * sent with the backoff still in effect.  Not until
2767                          * we get an ack from a non-retransmitted packet do
2768                          * we reset the backoff and rto.  This allows us to deal
2769                          * with a situation where the network delay has increased
2770                          * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2771                          */
2772 
2773                         /*
2774                          *      We have one less packet out there. 
2775                          */
2776                          
2777                         if (sk->packets_out > 0) 
2778                                 sk->packets_out --;
2779                         /* 
2780                          *      Wake up the process, it can probably write more. 
2781                          */
2782                         if (!sk->dead) 
2783                                 sk->write_space(sk);
2784                         oskb = sk->send_head;
2785 
2786                         if (!(flag&2)) 
2787                         {
2788                                 long m;
2789         
2790                                 /*
2791                                  *      The following amusing code comes from Jacobson's
2792                                  *      article in SIGCOMM '88.  Note that rtt and mdev
2793                                  *      are scaled versions of rtt and mean deviation.
2794                                  *      This is designed to be as fast as possible 
2795                                  *      m stands for "measurement".
2796                                  */
2797         
2798                                 m = jiffies - oskb->when;  /* RTT */
2799                                 if(m<=0)
2800                                         m=1;            /* IS THIS RIGHT FOR <0 ??? */
2801                                 m -= (sk->rtt >> 3);    /* m is now error in rtt est */
2802                                 sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
2803                                 if (m < 0)
2804                                         m = -m;         /* m is now abs(error) */
2805                                 m -= (sk->mdev >> 2);   /* similar update on mdev */
2806                                 sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
2807         
2808                                 /*
2809                                  *      Now update timeout.  Note that this removes any backoff.
2810                                  */
2811                          
2812                                 sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2813                                 if (sk->rto > 120*HZ)
2814                                         sk->rto = 120*HZ;
2815                                 if (sk->rto < 20)       /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
2816                                         sk->rto = 20;
2817                                 sk->backoff = 0;
2818                         }
2819                         flag |= (2|4);
2820                         cli();
2821                         oskb = sk->send_head;
2822                         IS_SKB(oskb);
2823                         sk->send_head = oskb->link3;
2824                         if (sk->send_head == NULL) 
2825                         {
2826                                 sk->send_tail = NULL;
2827                         }
2828 
2829                 /*
2830                  *      We may need to remove this from the dev send list. 
2831                  */
2832 
2833                         if (oskb->next)
2834                                 skb_unlink(oskb);
2835                         sti();
2836                         kfree_skb(oskb, FREE_WRITE); /* write. */
2837                         if (!sk->dead) 
2838                                 sk->write_space(sk);
2839                 }
2840                 else
2841                 {
2842                         break;
2843                 }
2844         }
2845 
2846         /*
2847          * XXX someone ought to look at this too.. at the moment, if skb_peek()
2848          * returns non-NULL, we complete ignore the timer stuff in the else
2849          * clause.  We ought to organize the code so that else clause can
2850          * (should) be executed regardless, possibly moving the PROBE timer
2851          * reset over.  The skb_peek() thing should only move stuff to the
2852          * write queue, NOT also manage the timer functions.
2853          */
2854 
2855         /*
2856          * Maybe we can take some stuff off of the write queue,
2857          * and put it onto the xmit queue.
2858          */
2859         if (skb_peek(&sk->write_queue) != NULL) 
2860         {
2861                 if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
2862                         (sk->retransmits == 0 || 
2863                          sk->timeout != TIME_WRITE ||
2864                          before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
2865                         && sk->packets_out < sk->cong_window) 
2866                 {
2867                         flag |= 1;
2868                         tcp_write_xmit(sk);
2869                 }
2870                 else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
2871                         sk->send_head == NULL &&
2872                         sk->ack_backlog == 0 &&
2873                         sk->state != TCP_TIME_WAIT) 
2874                 {
2875                         reset_timer(sk, TIME_PROBE0, sk->rto);
2876                 }               
2877         }
2878         else
2879         {
2880                 /*
2881                  * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
2882                  * from TCP_CLOSE we don't do anything
2883                  *
2884                  * from anything else, if there is write data (or fin) pending,
2885                  * we use a TIME_WRITE timeout, else if keepalive we reset to
2886                  * a KEEPALIVE timeout, else we delete the timer.
2887                  *
2888                  * We do not set flag for nominal write data, otherwise we may
2889                  * force a state where we start to write itsy bitsy tidbits
2890                  * of data.
2891                  */
2892 
2893                 switch(sk->state) {
2894                 case TCP_TIME_WAIT:
2895                         /*
2896                          * keep us in TIME_WAIT until we stop getting packets,
2897                          * reset the timeout.
2898                          */
2899                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2900                         break;
2901                 case TCP_CLOSE:
2902                         /*
2903                          * don't touch the timer.
2904                          */
2905                         break;
2906                 default:
2907                         /*
2908                          * must check send_head, write_queue, and ack_backlog
2909                          * to determine which timeout to use.
2910                          */
2911                         if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) {
2912                                 reset_timer(sk, TIME_WRITE, sk->rto);
2913                         } else if (sk->keepopen) {
2914                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2915                         } else {
2916                                 delete_timer(sk);
2917                         }
2918                         break;
2919                 }
2920 #ifdef NOTDEF
2921                 if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2922                 sk->state != TCP_TIME_WAIT && !sk->keepopen) 
2923                 {
2924                         if (!sk->dead)
2925                                 sk->write_space(sk);
2926                         if (sk->keepopen) {
2927                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2928                         } else {
2929                                 delete_timer(sk);
2930                         }
2931                 }
2932                 else
2933                 {
2934                         if (sk->state != (unsigned char) sk->keepopen) 
2935                         {
2936                                 reset_timer(sk, TIME_WRITE, sk->rto);
2937                         }
2938                         if (sk->state == TCP_TIME_WAIT) 
2939                         {
2940                                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2941                         }       
2942                 }
2943 #endif
2944         }
2945 
2946         if (sk->packets_out == 0 && sk->partial != NULL &&
2947                 skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
2948         {
2949                 flag |= 1;
2950                 tcp_send_partial(sk);
2951         }
2952 
2953         /*
2954          * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
2955          * we are now waiting for an acknowledge to our FIN.  The other end is
2956          * already in TIME_WAIT.
2957          *
2958          * Move to TCP_CLOSE on success.
2959          */
2960 
2961         if (sk->state == TCP_LAST_ACK) 
2962         {
2963                 if (!sk->dead)
2964                         sk->state_change(sk);
2965                 if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) 
2966                 {
2967                         flag |= 1;
2968                         tcp_time_wait(sk);
2969                         sk->shutdown = SHUTDOWN_MASK;
2970                 }
2971         }
2972 
2973         /*
2974          * Incoming ACK to a FIN we sent in the case of our initiating the close.
2975          *
2976          * Move to FIN_WAIT2 to await a FIN from the other end. Set
2977          * SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in.
2978          */
2979 
2980         if (sk->state == TCP_FIN_WAIT1) 
2981         {
2982 
2983                 if (!sk->dead) 
2984                         sk->state_change(sk);
2985                 if (sk->rcv_ack_seq == sk->write_seq) 
2986                 {
2987                         flag |= 1;
2988                         sk->shutdown |= SEND_SHUTDOWN;
2989                         tcp_set_state(sk, TCP_FIN_WAIT2);
2990                 }
2991         }
2992 
2993         /*
2994          *      Incoming ACK to a FIN we sent in the case of a simultaneous close.
2995          *
2996          *      Move to TIME_WAIT
2997          */
2998 
2999         if (sk->state == TCP_CLOSING) 
3000         {
3001 
3002                 if (!sk->dead) 
3003                         sk->state_change(sk);
3004                 if (sk->rcv_ack_seq == sk->write_seq) 
3005                 {
3006                         flag |= 1;
3007                         tcp_time_wait(sk);
3008                 }
3009         }
3010 
3011         /*
3012          * I make no guarantees about the first clause in the following
3013          * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
3014          * what conditions "!flag" would be true.  However I think the rest
3015          * of the conditions would prevent that from causing any
3016          * unnecessary retransmission. 
3017          *   Clearly if the first packet has expired it should be 
3018          * retransmitted.  The other alternative, "flag&2 && retransmits", is
3019          * harder to explain:  You have to look carefully at how and when the
3020          * timer is set and with what timeout.  The most recent transmission always
3021          * sets the timer.  So in general if the most recent thing has timed
3022          * out, everything before it has as well.  So we want to go ahead and
3023          * retransmit some more.  If we didn't explicitly test for this
3024          * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
3025          * would not be true.  If you look at the pattern of timing, you can
3026          * show that rto is increased fast enough that the next packet would
3027          * almost never be retransmitted immediately.  Then you'd end up
3028          * waiting for a timeout to send each packet on the retransmission
3029          * queue.  With my implementation of the Karn sampling algorithm,
3030          * the timeout would double each time.  The net result is that it would
3031          * take a hideous amount of time to recover from a single dropped packet.
3032          * It's possible that there should also be a test for TIME_WRITE, but
3033          * I think as long as "send_head != NULL" and "retransmit" is on, we've
3034          * got to be in real retransmission mode.
3035          *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
3036          * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
3037          * As long as no further losses occur, this seems reasonable.
3038          */
3039         
3040         if (((!flag) || (flag&4)) && sk->send_head != NULL &&
3041                (((flag&2) && sk->retransmits) ||
3042                (sk->send_head->when + sk->rto < jiffies))) 
3043         {
3044                 if(sk->send_head->when + sk->rto < jiffies)
3045                         tcp_retransmit(sk,0);   
3046                 else
3047                 {
3048                         ip_do_retransmit(sk, 1);
3049                         reset_timer(sk, TIME_WRITE, sk->rto);
3050                 }
3051         }
3052 
3053         return(1);
3054 }
3055 
3056 
3057 /*
3058  *      This routine handles the data.  If there is room in the buffer,
3059  *      it will be have already been moved into it.  If there is no
3060  *      room, then we will just have to discard the packet.
3061  */
3062 
3063 static int tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
3064          unsigned long saddr, unsigned short len)
3065 {
3066         struct sk_buff *skb1, *skb2;
3067         struct tcphdr *th;
3068         int dup_dumped=0;
3069         unsigned long new_seq;
3070         struct sk_buff *tail;
3071         unsigned long shut_seq;
3072 
3073         th = skb->h.th;
3074         skb->len = len -(th->doff*4);
3075 
3076         /* The bytes in the receive read/assembly queue has increased. Needed for the
3077            low memory discard algorithm */
3078            
3079         sk->bytes_rcv += skb->len;
3080         
3081         if (skb->len == 0 && !th->fin && !th->urg && !th->psh) 
3082         {
3083                 /* 
3084                  *      Don't want to keep passing ack's back and forth. 
3085                  *      (someone sent us dataless, boring frame)
3086                  */
3087                 if (!th->ack)
3088                         tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
3089                 kfree_skb(skb, FREE_READ);
3090                 return(0);
3091         }
3092         
3093         /*
3094          *      We no longer have anyone receiving data on this connection.
3095          */
3096 
3097         if(sk->shutdown & RCV_SHUTDOWN)
3098         {
3099                 new_seq= th->seq + skb->len + th->syn;  /* Right edge of _data_ part of frame */
3100                 
3101                 /*
3102                  *      This is subtle and not nice. When we shut down we can
3103                  *      have data in the queue and acked_seq therefore not
3104                  *      pointing to the last byte that will be read. Thus
3105                  *      the naive implementation:
3106                  *              after(new_seq,sk->acked_seq+1)
3107                  *      will cause bogus resets IFF a resend of a frame that has
3108                  *      been queued but not yet read after a shutdown has been done.
3109                  *      What we do now is a bit more complex but works as
3110                  *      follows. If the queue is empty copied_seq+1 is right (+1 for FIN)
3111                  *      if the queue has data the shutdown occurs at the right edge of
3112                  *      the last packet queued +1
3113                  *
3114                  *      We can't simply ack data beyond this point as it has
3115                  *      and will never be received by an application.
3116                  */
3117                 tail=skb_peek(&sk->receive_queue);
3118                 if(tail!=NULL)
3119                 {
3120                         tail=sk->receive_queue.prev;
3121                         shut_seq=tail->h.th->seq+tail->len+1;
3122                 }
3123                 else
3124                         shut_seq=sk->copied_seq+1;
3125                 
3126                 if(after(new_seq,shut_seq))
3127                 {
3128                         sk->acked_seq = new_seq + th->fin;
3129                         if(sk->debug)
3130                                 printk("Data arrived on %p after close [Data right edge %lX, Socket shut on %lX] %d\n",
3131                                         sk, new_seq, shut_seq, sk->blog);
3132                         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
3133                                 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
3134                         tcp_statistics.TcpEstabResets++;
3135                         tcp_set_state(sk,TCP_CLOSE);
3136                         sk->err = EPIPE;
3137                         sk->shutdown = SHUTDOWN_MASK;
3138                         kfree_skb(skb, FREE_READ);
3139                         if (!sk->dead)
3140                                 sk->state_change(sk);
3141                         return(0);
3142                 }
3143         }
3144         /*
3145          *      Now we have to walk the chain, and figure out where this one
3146          *      goes into it.  This is set up so that the last packet we received
3147          *      will be the first one we look at, that way if everything comes
3148          *      in order, there will be no performance loss, and if they come
3149          *      out of order we will be able to fit things in nicely.
3150          */
3151 
3152         /* 
3153          *      This should start at the last one, and then go around forwards.
3154          */
3155 
3156         if (skb_peek(&sk->receive_queue) == NULL)       /* Empty queue is easy case */
3157         {
3158                 skb_queue_head(&sk->receive_queue,skb);
3159                 skb1= NULL;
3160         } 
3161         else
3162         {
3163                 for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
3164                 {
3165                         if(sk->debug)
3166                         {
3167                                 printk("skb1=%p :", skb1);
3168                                 printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
3169                                 printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
3170                                 printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
3171                                                 sk->acked_seq);
3172                         }
3173                         
3174                         /*
3175                          *      Optimisation: Duplicate frame or extension of previous frame from
3176                          *      same sequence point (lost ack case).
3177                          *      The frame contains duplicate data or replaces a previous frame
3178                          *      discard the previous frame (safe as sk->inuse is set) and put
3179                          *      the new one in its place.
3180                          */
3181                          
3182                         if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
3183                         {
3184                                 skb_append(skb1,skb);
3185                                 skb_unlink(skb1);
3186                                 kfree_skb(skb1,FREE_READ);
3187                                 dup_dumped=1;
3188                                 skb1=NULL;
3189                                 break;
3190                         }
3191                         
3192                         /*
3193                          *      Found where it fits
3194                          */
3195                          
3196                         if (after(th->seq+1, skb1->h.th->seq))
3197                         {
3198                                 skb_append(skb1,skb);
3199                                 break;
3200                         }
3201                         
3202                         /*
3203                          *      See if we've hit the start. If so insert.
3204                          */
3205                         if (skb1 == skb_peek(&sk->receive_queue))
3206                         {
3207                                 skb_queue_head(&sk->receive_queue, skb);
3208                                 break;
3209                         }
3210                 }
3211         }
3212 
3213         /*
3214          *      Figure out what the ack value for this frame is
3215          */
3216          
3217         th->ack_seq = th->seq + skb->len;
3218         if (th->syn) 
3219                 th->ack_seq++;
3220         if (th->fin)
3221                 th->ack_seq++;
3222 
3223         if (before(sk->acked_seq, sk->copied_seq)) 
3224         {
3225                 printk("*** tcp.c:tcp_data bug acked < copied\n");
3226                 sk->acked_seq = sk->copied_seq;
3227         }
3228 
3229         /*
3230          *      Now figure out if we can ack anything.
3231          */
3232 
3233         if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) 
3234         {
3235                 if (before(th->seq, sk->acked_seq+1)) 
3236                 {
3237                         int newwindow;
3238 
3239                         if (after(th->ack_seq, sk->acked_seq)) 
3240                         {
3241                                 newwindow = sk->window-(th->ack_seq - sk->acked_seq);
3242                                 if (newwindow < 0)
3243                                         newwindow = 0;  
3244                                 sk->window = newwindow;
3245                                 sk->acked_seq = th->ack_seq;
3246                         }
3247                         skb->acked = 1;
3248 
3249                         /* 
3250                          *      When we ack the fin, we turn on the RCV_SHUTDOWN flag.
3251                          */
3252 
3253                         if (skb->h.th->fin) 
3254                         {
3255                                 if (!sk->dead) 
3256                                         sk->state_change(sk);
3257                                 sk->shutdown |= RCV_SHUTDOWN;
3258                         }
3259           
3260                         for(skb2 = skb->next;
3261                             skb2 != (struct sk_buff *)&sk->receive_queue;
3262                             skb2 = skb2->next) 
3263                         {
3264                                 if (before(skb2->h.th->seq, sk->acked_seq+1)) 
3265                                 {
3266                                         if (after(skb2->h.th->ack_seq, sk->acked_seq))
3267                                         {
3268                                                 newwindow = sk->window -
3269                                                  (skb2->h.th->ack_seq - sk->acked_seq);
3270                                                 if (newwindow < 0)
3271                                                         newwindow = 0;  
3272                                                 sk->window = newwindow;
3273                                                 sk->acked_seq = skb2->h.th->ack_seq;
3274                                         }
3275                                         skb2->acked = 1;
3276                                         /*
3277                                          *      When we ack the fin, we turn on
3278                                          *      the RCV_SHUTDOWN flag.
3279                                          */
3280                                         if (skb2->h.th->fin) 
3281                                         {
3282                                                 sk->shutdown |= RCV_SHUTDOWN;
3283                                                 if (!sk->dead)
3284                                                         sk->state_change(sk);
3285                                         }
3286 
3287                                         /*
3288                                          *      Force an immediate ack.
3289                                          */
3290                                          
3291                                         sk->ack_backlog = sk->max_ack_backlog;
3292                                 }
3293                                 else
3294                                 {
3295                                         break;
3296                                 }
3297                         }
3298 
3299                         /*
3300                          *      This also takes care of updating the window.
3301                          *      This if statement needs to be simplified.
3302                          */
3303                         if (!sk->delay_acks ||
3304                             sk->ack_backlog >= sk->max_ack_backlog || 
3305                             sk->bytes_rcv > sk->max_unacked || th->fin) {
3306         /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
3307                         }
3308                         else 
3309                         {
3310                                 sk->ack_backlog++;
3311                                 if(sk->debug)
3312                                         printk("Ack queued.\n");
3313                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3314                         }
3315                 }
3316         }
3317 
3318         /*
3319          *      If we've missed a packet, send an ack.
3320          *      Also start a timer to send another.
3321          */
3322          
3323         if (!skb->acked) 
3324         {
3325         
3326         /*
3327          *      This is important.  If we don't have much room left,
3328          *      we need to throw out a few packets so we have a good
3329          *      window.  Note that mtu is used, not mss, because mss is really
3330          *      for the send side.  He could be sending us stuff as large as mtu.
3331          */
3332                  
3333                 while (sk->prot->rspace(sk) < sk->mtu) 
3334                 {
3335                         skb1 = skb_peek(&sk->receive_queue);
3336                         if (skb1 == NULL) 
3337                         {
3338                                 printk("INET: tcp.c:tcp_data memory leak detected.\n");
3339                                 break;
3340                         }
3341 
3342                         /*
3343                          *      Don't throw out something that has been acked. 
3344                          */
3345                  
3346                         if (skb1->acked) 
3347                         {
3348                                 break;
3349                         }
3350                 
3351                         skb_unlink(skb1);
3352                         kfree_skb(skb1, FREE_READ);
3353                 }
3354                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3355                 sk->ack_backlog++;
3356                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3357         }
3358         else
3359         {
3360                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3361         }
3362 
3363         /*
3364          *      Now tell the user we may have some data. 
3365          */
3366          
3367         if (!sk->dead) 
3368         {
3369                 if(sk->debug)
3370                         printk("Data wakeup.\n");
3371                 sk->data_ready(sk,0);
3372         } 
3373         return(0);
3374 }
3375 
3376 
3377 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
3378 {
3379         unsigned long ptr = ntohs(th->urg_ptr);
3380 
3381         if (ptr)
3382                 ptr--;
3383         ptr += th->seq;
3384 
3385         /* ignore urgent data that we've already seen and read */
3386         if (after(sk->copied_seq+1, ptr))
3387                 return;
3388 
3389         /* do we already have a newer (or duplicate) urgent pointer? */
3390         if (sk->urg_data && !after(ptr, sk->urg_seq))
3391                 return;
3392 
3393         /* tell the world about our new urgent pointer */
3394         if (sk->proc != 0) {
3395                 if (sk->proc > 0) {
3396                         kill_proc(sk->proc, SIGURG, 1);
3397                 } else {
3398                         kill_pg(-sk->proc, SIGURG, 1);
3399                 }
3400         }
3401         sk->urg_data = URG_NOTYET;
3402         sk->urg_seq = ptr;
3403 }
3404 
3405 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
3406         unsigned long saddr, unsigned long len)
3407 {
3408         unsigned long ptr;
3409 
3410         /* check if we get a new urgent pointer */
3411         if (th->urg)
3412                 tcp_check_urg(sk,th);
3413 
3414         /* do we wait for any urgent data? */
3415         if (sk->urg_data != URG_NOTYET)
3416                 return 0;
3417 
3418         /* is the urgent pointer pointing into this packet? */
3419         ptr = sk->urg_seq - th->seq + th->doff*4;
3420         if (ptr >= len)
3421                 return 0;
3422 
3423         /* ok, got the correct packet, update info */
3424         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
3425         if (!sk->dead)
3426                 sk->data_ready(sk,0);
3427         return 0;
3428 }
3429 
3430 
3431 /*
3432  *  This deals with incoming fins. 'Linus at 9 O'clock' 8-) 
3433  *
3434  *  If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
3435  *  (and thence onto LAST-ACK and finally, CLOSE, we never enter
3436  *  TIME-WAIT)
3437  *
3438  *  If we are in FINWAIT-1, a received FIN indicates simultaneous
3439  *  close and we go into CLOSING (and later onto TIME-WAIT)
3440  *
3441  *  If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3442  *
3443  */
3444 static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
3445          unsigned long saddr, struct device *dev)
3446 {
3447         sk->fin_seq = th->seq + skb->len + th->syn + th->fin;
3448 
3449         if (!sk->dead) 
3450         {
3451                 sk->state_change(sk);
3452         }
3453 
3454         switch(sk->state) 
3455         {
3456                 case TCP_SYN_RECV:
3457                 case TCP_SYN_SENT:
3458                 case TCP_ESTABLISHED:
3459                         /*
3460                          * move to CLOSE_WAIT, tcp_data() already handled
3461                          * sending the ack.
3462                          */
3463                         reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
3464                         tcp_set_state(sk,TCP_CLOSE_WAIT);
3465                         if (th->rst)
3466                                 sk->shutdown = SHUTDOWN_MASK;
3467                         break;
3468 
3469                 case TCP_CLOSE_WAIT:
3470                 case TCP_CLOSING:
3471                         /*
3472                          * received a retransmission of the FIN, do
3473                          * nothing.
3474                          */
3475                         break;
3476                 case TCP_TIME_WAIT:
3477                         /*
3478                          * received a retransmission of the FIN,
3479                          * restart the TIME_WAIT timer.
3480                          */
3481                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3482                         return(0);
3483                 case TCP_FIN_WAIT1:
3484                         /*
3485                          * This case occurs when a simultaneous close
3486                          * happens, we must ack the received FIN and
3487                          * enter the CLOSING state.
3488                          *
3489                          * XXX timeout not set properly
3490                          */
3491 
3492                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3493                         tcp_set_state(sk,TCP_CLOSING);
3494                         break;
3495                 case TCP_FIN_WAIT2:
3496                         /*
3497                          * received a FIN -- send ACK and enter TIME_WAIT
3498                          */
3499                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3500                         sk->shutdown|=SHUTDOWN_MASK;
3501                         tcp_set_state(sk,TCP_TIME_WAIT);
3502                         break;
3503                 case TCP_CLOSE:
3504                         /*
3505                          * already in CLOSE
3506                          */
3507                         break;
3508                 default:
3509                         tcp_set_state(sk,TCP_LAST_ACK);
3510         
3511                         /* Start the timers. */
3512                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3513                         return(0);
3514         }
3515 /*      sk->ack_backlog++;     tcp_data has already dealt with ACK's */
3516 
3517         return(0);
3518 }
3519 
3520 
3521 /* This will accept the next outstanding connection. */
3522 static struct sock *
3523 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
3524 {
3525         struct sock *newsk;
3526         struct sk_buff *skb;
3527   
3528   /*
3529    * We need to make sure that this socket is listening,
3530    * and that it has something pending.
3531    */
3532 
3533         if (sk->state != TCP_LISTEN) 
3534         {
3535                 sk->err = EINVAL;
3536                 return(NULL); 
3537         }
3538 
3539         /* Avoid the race. */
3540         cli();
3541         sk->inuse = 1;
3542 
3543         while((skb = tcp_dequeue_established(sk)) == NULL) 
3544         {
3545                 if (flags & O_NONBLOCK) 
3546                 {
3547                         sti();
3548                         release_sock(sk);
3549                         sk->err = EAGAIN;
3550                         return(NULL);
3551                 }
3552 
3553                 release_sock(sk);
3554                 interruptible_sleep_on(sk->sleep);
3555                 if (current->signal & ~current->blocked) 
3556                 {
3557                         sti();
3558                         sk->err = ERESTARTSYS;
3559                         return(NULL);
3560                 }
3561                 sk->inuse = 1;
3562         }
3563         sti();
3564 
3565         /*
3566          *      Now all we need to do is return skb->sk. 
3567          */
3568 
3569         newsk = skb->sk;
3570 
3571         kfree_skb(skb, FREE_READ);
3572         sk->ack_backlog--;
3573         release_sock(sk);
3574         return(newsk);
3575 }
3576 
3577 
3578 /*
3579  *      This will initiate an outgoing connection. 
3580  */
3581  
3582 static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3583 {
3584         struct sk_buff *buff;
3585         struct device *dev=NULL;
3586         unsigned char *ptr;
3587         int tmp;
3588         int atype;
3589         struct tcphdr *t1;
3590         struct rtable *rt;
3591 
3592         if (sk->state != TCP_CLOSE) 
3593                 return(-EISCONN);
3594 
3595         if (addr_len < 8) 
3596                 return(-EINVAL);
3597 
3598         if (usin->sin_family && usin->sin_family != AF_INET) 
3599                 return(-EAFNOSUPPORT);
3600 
3601         /*
3602          *      connect() to INADDR_ANY means loopback (BSD'ism).
3603          */
3604         
3605         if(usin->sin_addr.s_addr==INADDR_ANY)
3606                 usin->sin_addr.s_addr=ip_my_addr();
3607                   
3608         /*
3609          *      Don't want a TCP connection going to a broadcast address 
3610          */
3611 
3612         if ((atype=ip_chk_addr(usin->sin_addr.s_addr)) == IS_BROADCAST || atype==IS_MULTICAST) 
3613                 return -ENETUNREACH;
3614   
3615         sk->inuse = 1;
3616         sk->daddr = usin->sin_addr.s_addr;
3617         sk->write_seq = jiffies * SEQ_TICK - seq_offset;
3618         sk->window_seq = sk->write_seq;
3619         sk->rcv_ack_seq = sk->write_seq -1;
3620         sk->err = 0;
3621         sk->dummy_th.dest = usin->sin_port;
3622         release_sock(sk);
3623 
3624         buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3625         if (buff == NULL) 
3626         {
3627                 return(-ENOMEM);
3628         }
3629         sk->inuse = 1;
3630         buff->len = 24;
3631         buff->sk = sk;
3632         buff->free = 1;
3633         buff->localroute = sk->localroute;
3634         
3635         t1 = (struct tcphdr *) buff->data;
3636 
3637         /*
3638          *      Put in the IP header and routing stuff. 
3639          */
3640          
3641         rt=ip_rt_route(sk->daddr, NULL, NULL);
3642         
3643 
3644         /*
3645          *      We need to build the routing stuff from the things saved in skb. 
3646          */
3647 
3648         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3649                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3650         if (tmp < 0) 
3651         {
3652                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3653                 release_sock(sk);
3654                 return(-ENETUNREACH);
3655         }
3656 
3657         buff->len += tmp;
3658         t1 = (struct tcphdr *)((char *)t1 +tmp);
3659 
3660         memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3661         t1->seq = ntohl(sk->write_seq++);
3662         sk->sent_seq = sk->write_seq;
3663         buff->h.seq = sk->write_seq;
3664         t1->ack = 0;
3665         t1->window = 2;
3666         t1->res1=0;
3667         t1->res2=0;
3668         t1->rst = 0;
3669         t1->urg = 0;
3670         t1->psh = 0;
3671         t1->syn = 1;
3672         t1->urg_ptr = 0;
3673         t1->doff = 6;
3674         /* use 512 or whatever user asked for */
3675         
3676         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
3677                 sk->window_clamp=rt->rt_window;
3678         else
3679                 sk->window_clamp=0;
3680 
3681         if (sk->user_mss)
3682                 sk->mtu = sk->user_mss;
3683         else if(rt!=NULL && (rt->rt_flags&RTF_MTU))
3684                 sk->mtu = rt->rt_mss;
3685         else 
3686         {
3687 #ifdef CONFIG_INET_SNARL
3688                 if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3689 #else
3690                 if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3691 #endif
3692                         sk->mtu = 576 - HEADER_SIZE;
3693                 else
3694                         sk->mtu = MAX_WINDOW;
3695         }
3696         /*
3697          *      but not bigger than device MTU 
3698          */
3699 
3700         if(sk->mtu <32)
3701                 sk->mtu = 32;   /* Sanity limit */
3702                 
3703         sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3704         
3705         /*
3706          *      Put in the TCP options to say MTU. 
3707          */
3708 
3709         ptr = (unsigned char *)(t1+1);
3710         ptr[0] = 2;
3711         ptr[1] = 4;
3712         ptr[2] = (sk->mtu) >> 8;
3713         ptr[3] = (sk->mtu) & 0xff;
3714         tcp_send_check(t1, sk->saddr, sk->daddr,
3715                   sizeof(struct tcphdr) + 4, sk);
3716 
3717         /*
3718          *      This must go first otherwise a really quick response will get reset. 
3719          */
3720 
3721         tcp_set_state(sk,TCP_SYN_SENT);
3722         sk->rto = TCP_TIMEOUT_INIT;
3723         reset_timer(sk, TIME_WRITE, sk->rto);   /* Timer for repeating the SYN until an answer */
3724         sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3725 
3726         sk->prot->queue_xmit(sk, dev, buff, 0);  
3727         tcp_statistics.TcpActiveOpens++;
3728         tcp_statistics.TcpOutSegs++;
3729   
3730         release_sock(sk);
3731         return(0);
3732 }
3733 
3734 
3735 /* This functions checks to see if the tcp header is actually acceptable. */
3736 static int
3737 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3738              struct options *opt, unsigned long saddr, struct device *dev)
3739 {
3740         unsigned long next_seq;
3741 
3742         next_seq = len - 4*th->doff;
3743         if (th->fin)
3744                 next_seq++;
3745         /* if we have a zero window, we can't have any data in the packet.. */
3746         if (next_seq && !sk->window)
3747                 goto ignore_it;
3748         next_seq += th->seq;
3749 
3750         /*
3751          * This isn't quite right.  sk->acked_seq could be more recent
3752          * than sk->window.  This is however close enough.  We will accept
3753          * slightly more packets than we should, but it should not cause
3754          * problems unless someone is trying to forge packets.
3755          */
3756 
3757         /* have we already seen all of this packet? */
3758         if (!after(next_seq+1, sk->acked_seq))
3759                 goto ignore_it;
3760         /* or does it start beyond the window? */
3761         if (!before(th->seq, sk->acked_seq + sk->window + 1))
3762                 goto ignore_it;
3763 
3764         /* ok, at least part of this packet would seem interesting.. */
3765         return 1;
3766 
3767 ignore_it:
3768         if (th->rst)
3769                 return 0;
3770 
3771         /*
3772          *      Send a reset if we get something not ours and we are
3773          *      unsynchronized. Note: We don't do anything to our end. We
3774          *      are just killing the bogus remote connection then we will
3775          *      connect again and it will work (with luck).
3776          */
3777          
3778         if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) {
3779                 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3780                 return 1;
3781         }
3782 
3783         /* Try to resync things. */
3784         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3785         return 0;
3786 }
3787 
3788 
3789 #ifdef TCP_FASTPATH
3790 /*
3791  *      Is the end of the queue clear of fragments as yet unmerged into the data stream
3792  *      Yes if
3793  *      a) The queue is empty
3794  *      b) The last frame on the queue has the acked flag set
3795  */
3796 
3797 static inline int tcp_clean_end(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3798 {
3799         struct sk_buff *skb=skb_peek(&sk->receive_queue);
3800         if(skb==NULL || sk->receive_queue.prev->acked)
3801                 return 1;
3802 }
3803 
3804 #endif
3805 
3806 int
3807 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3808         unsigned long daddr, unsigned short len,
3809         unsigned long saddr, int redo, struct inet_protocol * protocol)
3810 {
3811         struct tcphdr *th;
3812         struct sock *sk;
3813 
3814         if (!skb) 
3815         {
3816                 return(0);
3817         }
3818 
3819         if (!dev) 
3820         {
3821                 return(0);
3822         }
3823   
3824         tcp_statistics.TcpInSegs++;
3825   
3826         if(skb->pkt_type!=PACKET_HOST)
3827         {
3828                 kfree_skb(skb,FREE_READ);
3829                 return(0);
3830         }
3831   
3832         th = skb->h.th;
3833 
3834         /*
3835          *      Find the socket.
3836          */
3837 
3838         sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3839 
3840         /*
3841          *      If this socket has got a reset its to all intents and purposes 
3842          *      really dead 
3843          */
3844          
3845         if (sk!=NULL && sk->zapped)
3846                 sk=NULL;
3847 
3848         if (!redo) 
3849         {
3850                 if (tcp_check(th, len, saddr, daddr )) 
3851                 {
3852                         skb->sk = NULL;
3853                         kfree_skb(skb,FREE_READ);
3854                         /*
3855                          * We don't release the socket because it was
3856                          * never marked in use.
3857                          */
3858                         return(0);
3859                 }
3860                 th->seq = ntohl(th->seq);
3861 
3862                 /* See if we know about the socket. */
3863                 if (sk == NULL) 
3864                 {
3865                         if (!th->rst)
3866                                 tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3867                         skb->sk = NULL;
3868                         kfree_skb(skb, FREE_READ);
3869                         return(0);
3870                 }
3871 
3872                 skb->len = len;
3873                 skb->acked = 0;
3874                 skb->used = 0;
3875                 skb->free = 0;
3876                 skb->saddr = daddr;
3877                 skb->daddr = saddr;
3878         
3879                 /* We may need to add it to the backlog here. */
3880                 cli();
3881                 if (sk->inuse) 
3882                 {
3883                         skb_queue_tail(&sk->back_log, skb);
3884                         sti();
3885                         return(0);
3886                 }
3887                 sk->inuse = 1;
3888                 sti();
3889         }
3890         else
3891         {
3892                 if (!sk) 
3893                 {
3894                         return(0);
3895                 }
3896         }
3897 
3898 
3899         if (!sk->prot) 
3900         {
3901                 return(0);
3902         }
3903 
3904 
3905         /*
3906          *      Charge the memory to the socket. 
3907          */
3908          
3909         if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) 
3910         {
3911                 kfree_skb(skb, FREE_READ);
3912                 release_sock(sk);
3913                 return(0);
3914         }
3915 
3916         skb->sk=sk;
3917         sk->rmem_alloc += skb->mem_len;
3918 
3919 #ifdef TCP_FASTPATH
3920 /*
3921  *      Incoming data stream fastpath. 
3922  *
3923  *      We try to optimise two things.
3924  *      1) Spot general data arriving without funny options and skip extra checks and the switch.
3925  *      2) Spot the common case in raw data receive streams of a packet that has no funny options,
3926  *      fits exactly on the end of the current queue and may or may not have the ack bit set.
3927  *
3928  *      Case two especially is done inline in this routine so there are no long jumps causing heavy
3929  *      cache thrashing, no function call overhead (except for the ack sending if needed) and for
3930  *      speed although further optimizing here is possible.
3931  */
3932  
3933         /* I'm trusting gcc to optimise this sensibly... might need judicious application of a software mallet */
3934         if(!(sk->shutdown & RCV_SHUTDOWN) && sk->state==TCP_ESTABLISHED && !th->urg && !th->syn && !th->fin && !th->rst)
3935         {       
3936                 /* Packets in order. Fits window */
3937                 if(th->seq == sk->acked_seq+1 && sk->window && tcp_clean_end(sk))
3938                 {
3939                         /* Ack is harder */
3940                         if(th->ack && !tcp_ack(sk, th, saddr, len))
3941                         {
3942                                 kfree_skb(skb, FREE_READ);
3943                                 release_sock(sk);
3944                                 return 0;
3945                         }
3946                         /*
3947                          *      Set up variables
3948                          */
3949                         skb->len -= (th->doff *4);
3950                         sk->bytes_rcv += skb->len;
3951                         tcp_rx_hit2++;
3952                         if(skb->len)
3953                         {
3954                                 skb_queue_tail(&sk->receive_queue,skb); /* We already know where to put it */
3955                                 if(sk->window >= skb->len)
3956                                         sk->window-=skb->len;                   /* We know its effect on the window */
3957                                 else
3958                                         sk->window=0;
3959                                 sk->acked_seq = th->seq+skb->len;       /* Easy */
3960                                 skb->acked=1;                           /* Guaranteed true */
3961                                 if(!sk->delay_acks || sk->ack_backlog >= sk->max_ack_backlog || 
3962                                         sk->bytes_rcv > sk->max_unacked)
3963                                 {
3964                                         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th , saddr);
3965                                 }
3966                                 else
3967                                 {
3968                                         sk->ack_backlog++;
3969                                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3970                                 }
3971                                 if(!sk->dead)
3972                                         sk->data_ready(sk,0);
3973                                 release_sock(sk);
3974                                 return 0;
3975                         }
3976                 }
3977                 /*
3978                  *      More generic case of arriving data stream in ESTABLISHED
3979                  */
3980                 tcp_rx_hit1++;
3981                 if(!tcp_sequence(sk, th, len, opt, saddr, dev))
3982                 {
3983                         kfree_skb(skb, FREE_READ);
3984                         release_sock(sk);
3985                         return 0;
3986                 }
3987                 if(th->ack && !tcp_ack(sk, th, saddr, len))
3988                 {
3989                         kfree_skb(skb, FREE_READ);
3990                         release_sock(sk);
3991                         return 0;
3992                 }
3993                 if(tcp_data(skb, sk, saddr, len))
3994                         kfree_skb(skb, FREE_READ);
3995                 release_sock(sk);
3996                 return 0;
3997         }
3998         tcp_rx_miss++;
3999 #endif  
4000 
4001         /*
4002          *      Now deal with all cases.
4003          */
4004          
4005         switch(sk->state) 
4006         {
4007         
4008                 /*
4009                  * This should close the system down if it's waiting
4010                  * for an ack that is never going to be sent.
4011                  */
4012                 case TCP_LAST_ACK:
4013                         if (th->rst) 
4014                         {
4015                                 sk->zapped=1;
4016                                 sk->err = ECONNRESET;
4017                                 tcp_set_state(sk,TCP_CLOSE);
4018                                 sk->shutdown = SHUTDOWN_MASK;
4019                                 if (!sk->dead) 
4020                                 {
4021                                         sk->state_change(sk);
4022                                 }
4023                                 kfree_skb(skb, FREE_READ);
4024                                 release_sock(sk);
4025                                 return(0);
4026                         }
4027 
4028                 case TCP_ESTABLISHED:
4029                 case TCP_CLOSE_WAIT:
4030                 case TCP_CLOSING:
4031                 case TCP_FIN_WAIT1:
4032                 case TCP_FIN_WAIT2:
4033                 case TCP_TIME_WAIT:
4034 
4035                         /*
4036                          * is it a good packet?
4037                          */
4038 
4039                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
4040                         {
4041                                 kfree_skb(skb, FREE_READ);
4042                                 release_sock(sk);
4043                                 return(0);
4044                         }
4045 
4046                         if (th->rst) 
4047                         {
4048                                 if(sk->state!=TCP_TIME_WAIT)    /* RFC 1337 recommendation re RST in time wait */
4049                                 {
4050                                         tcp_statistics.TcpEstabResets++;
4051                                         sk->zapped=1;
4052                                         /* This means the thing should really be closed. */
4053                                         sk->err = ECONNRESET;
4054                                         if (sk->state == TCP_CLOSE_WAIT) 
4055                                         {
4056                                                 sk->err = EPIPE;
4057                                         }
4058                                         tcp_set_state(sk,TCP_CLOSE);
4059                                         sk->shutdown = SHUTDOWN_MASK;
4060                                         if (!sk->dead) 
4061                                         {
4062                                                 sk->state_change(sk);
4063                                         }
4064                                 }
4065                                 kfree_skb(skb, FREE_READ);
4066                                 release_sock(sk);
4067                                 return(0);
4068                         }
4069                         if (th->syn) 
4070                         {
4071                                 long seq=sk->write_seq;
4072                                 int st=sk->state;
4073                                 tcp_statistics.TcpEstabResets++;
4074                                 sk->err = ECONNRESET;
4075                                 tcp_set_state(sk,TCP_CLOSE);
4076                                 sk->shutdown = SHUTDOWN_MASK;
4077                                 if(sk->debug)
4078                                         printk("Socket %p reset by SYN while established.\n", sk);
4079                                 if (!sk->dead) {
4080                                         sk->state_change(sk);
4081                                 }
4082                                 /*
4083                                  *      The BSD port reuse protocol violation.
4084                                  *      I do sometimes wonder how the *bsd people
4085                                  *      have the nerve to talk about 'standards'.
4086                                  *
4087                                  *      If seq > last used on connection then
4088                                  *      open a new connection and use 128000+seq of
4089                                  *      old connection.
4090                                  *
4091                                  */
4092                                  
4093                                 if(st==TCP_TIME_WAIT && th->seq > sk->acked_seq && sk->dead)
4094                                 {
4095                                         struct sock *psk=sk;
4096                                         /*
4097                                          *      Find the listening socket.
4098                                          */
4099                                         sk=get_sock(&tcp_prot, th->source, daddr, th->dest, saddr);
4100                                         if(sk && sk->state==TCP_LISTEN)
4101                                         {
4102                                                 sk->inuse=1;
4103                                                 tcp_conn_request(sk, skb, daddr, saddr,opt, dev,seq+128000);
4104                                                 release_sock(psk);
4105                                                 /* Fall through in case people are
4106                                                    also using the piggy backed SYN + data 
4107                                                    protocol violation */
4108                                         }
4109                                         else
4110                                         {
4111                                                 tcp_reset(daddr, saddr,  th, psk->prot, opt,dev, psk->ip_tos,psk->ip_ttl);
4112                                                 release_sock(psk);
4113                                                 kfree_skb(skb, FREE_READ);
4114                                                 return 0;
4115                                         }                       
4116                                 }
4117                                 else
4118                                 {
4119                                         tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
4120                                         kfree_skb(skb, FREE_READ);
4121                                         release_sock(sk);
4122                                         return(0);
4123                                 }
4124                         }       
4125                         if (th->ack && !tcp_ack(sk, th, saddr, len)) {
4126                                 kfree_skb(skb, FREE_READ);
4127                                 release_sock(sk);
4128                                 return(0);
4129                         }
4130         
4131                         if (tcp_urg(sk, th, saddr, len)) {
4132                                 kfree_skb(skb, FREE_READ);
4133                                 release_sock(sk);
4134                                 return(0);
4135                         }
4136 
4137         
4138                         if (tcp_data(skb, sk, saddr, len)) {
4139                                 kfree_skb(skb, FREE_READ);
4140                                 release_sock(sk);
4141                                 return(0);
4142                         }       
4143 
4144                         if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) {
4145                                 kfree_skb(skb, FREE_READ);
4146                                 release_sock(sk);
4147                                 return(0);
4148                         }
4149         
4150                         release_sock(sk);
4151                         return(0);
4152 
4153 
4154                 case TCP_CLOSE:
4155                         if (sk->dead || sk->daddr) {
4156                                 kfree_skb(skb, FREE_READ);
4157                                         release_sock(sk);
4158                                 return(0);
4159                         }
4160         
4161                         if (!th->rst) {
4162                                 if (!th->ack)
4163                                         th->ack_seq = 0;
4164                                 if(sk->debug) printk("Reset on closed socket %d.\n",sk->blog);
4165                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4166                         }
4167                         kfree_skb(skb, FREE_READ);
4168                         release_sock(sk);
4169                                 return(0);
4170         
4171                 case TCP_LISTEN:
4172                         if (th->rst) {
4173                                 kfree_skb(skb, FREE_READ);
4174                                 release_sock(sk);
4175                                 return(0);
4176                         }
4177                         if (th->ack) {
4178                                 if(sk->debug) printk("Reset on listening socket %d.\n",sk->blog);
4179                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4180                                 kfree_skb(skb, FREE_READ);
4181                                 release_sock(sk);
4182                                 return(0);
4183                         }
4184         
4185                         if (th->syn) 
4186                         {
4187                                 /*
4188                                  * Now we just put the whole thing including
4189                                  * the header and saddr, and protocol pointer
4190                                  * into the buffer.  We can't respond until the
4191                                  * user tells us to accept the connection.
4192                                  */
4193                                 tcp_conn_request(sk, skb, daddr, saddr, opt, dev, tcp_init_seq());
4194                                 release_sock(sk);
4195                                 return(0);
4196                         }
4197 
4198                         kfree_skb(skb, FREE_READ);
4199                         release_sock(sk);
4200                         return(0);
4201 
4202                 case TCP_SYN_RECV:
4203                         if (th->syn) {
4204                                 /* Probably a retransmitted syn */
4205                                 kfree_skb(skb, FREE_READ);
4206                                 release_sock(sk);
4207                                 return(0);
4208                         }
4209         
4210         
4211                 default:
4212                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
4213                         {
4214                                 kfree_skb(skb, FREE_READ);
4215                                 release_sock(sk);
4216                                 return(0);
4217                         }
4218         
4219                 case TCP_SYN_SENT:
4220                         if (th->rst) 
4221                         {
4222                                 tcp_statistics.TcpAttemptFails++;
4223                                 sk->err = ECONNREFUSED;
4224                                 tcp_set_state(sk,TCP_CLOSE);
4225                                 sk->shutdown = SHUTDOWN_MASK;
4226                                 sk->zapped = 1;
4227                                 if (!sk->dead) 
4228                                 {
4229                                         sk->state_change(sk);
4230                                 }
4231                                 kfree_skb(skb, FREE_READ);
4232                                 release_sock(sk);
4233                                 return(0);
4234                         }
4235                         if (!th->ack) 
4236                         {
4237                                 if (th->syn) 
4238                                 {
4239                                         /* Crossed SYN's are fine - but talking to
4240                                            yourself is right out... */
4241                                         if(sk->saddr==saddr && sk->daddr==daddr &&
4242                                                 sk->dummy_th.source==th->source &&
4243                                                 sk->dummy_th.dest==th->dest)
4244                                         {
4245                                                 tcp_statistics.TcpAttemptFails++;
4246                                                 sk->err = ECONNREFUSED;
4247                                                 tcp_set_state(sk,TCP_CLOSE);
4248                                                 sk->shutdown = SHUTDOWN_MASK;
4249                                                 sk->zapped = 1;
4250                                                 if (!sk->dead) 
4251                                                 {
4252                                                         sk->state_change(sk);
4253                                                 }
4254                                                 kfree_skb(skb, FREE_READ);
4255                                                 release_sock(sk);
4256                                                 return(0);
4257                                         }
4258                                         tcp_set_state(sk,TCP_SYN_RECV);
4259                                 }
4260                                 kfree_skb(skb, FREE_READ);
4261                                 release_sock(sk);
4262                                 return(0);
4263                         }
4264         
4265                         switch(sk->state) 
4266                         {
4267                                 case TCP_SYN_SENT:
4268                                         if (!tcp_ack(sk, th, saddr, len)) 
4269                                         {
4270                                                 tcp_statistics.TcpAttemptFails++;
4271                                                 tcp_reset(daddr, saddr, th,
4272                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4273                                                 kfree_skb(skb, FREE_READ);
4274                                                         release_sock(sk);
4275                                                 return(0);
4276                                         }
4277         
4278                                         /*
4279                                          * If the syn bit is also set, switch to
4280                                          * tcp_syn_recv, and then to established.
4281                                          */
4282                                         if (!th->syn) 
4283                                         {
4284                                                 kfree_skb(skb, FREE_READ);
4285                                                 release_sock(sk);
4286                                                 return(0);
4287                                         }
4288         
4289                                         /* Ack the syn and fall through. */
4290                                         sk->acked_seq = th->seq+1;
4291                                         sk->fin_seq = th->seq;
4292                                         tcp_send_ack(sk->sent_seq, th->seq+1,
4293                                                 sk, th, sk->daddr);
4294                 
4295                                 case TCP_SYN_RECV:
4296                                         if (!tcp_ack(sk, th, saddr, len)) 
4297                                         {
4298                                                 tcp_statistics.TcpAttemptFails++;
4299                                                 tcp_reset(daddr, saddr, th,
4300                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
4301                                                 kfree_skb(skb, FREE_READ);
4302                                                 release_sock(sk);
4303                                                 return(0);
4304                                         }
4305         
4306                                         tcp_set_state(sk,TCP_ESTABLISHED);
4307         
4308                                         /*
4309                                          *      Now we need to finish filling out
4310                                          *      some of the tcp header.
4311                                          * 
4312                                          *      We need to check for mtu info. 
4313                                          */
4314                                         tcp_options(sk, th);
4315                                         sk->dummy_th.dest = th->source;
4316                                         sk->copied_seq = sk->acked_seq-1;
4317                                         if (!sk->dead) 
4318                                         {
4319                                                 sk->state_change(sk);
4320                                         }
4321         
4322                                         /*
4323                                          * We've already processed his first
4324                                          * ack.  In just about all cases that
4325                                          * will have set max_window.  This is
4326                                          * to protect us against the possibility
4327                                          * that the initial window he sent was 0.
4328                                          * This must occur after tcp_options, which
4329                                          * sets sk->mtu.
4330                                          */
4331                                         if (sk->max_window == 0) 
4332                                         {
4333                                                 sk->max_window = 32;
4334                                                 sk->mss = min(sk->max_window, sk->mtu);
4335                                         }
4336 
4337                                         /*
4338                                          * Now process the rest like we were
4339                                          * already in the established state.
4340                                          */
4341                                         if (th->urg) 
4342                                         {
4343                                                 if (tcp_urg(sk, th, saddr, len)) 
4344                                                 { 
4345                                                         kfree_skb(skb, FREE_READ);
4346                                                         release_sock(sk);
4347                                                         return(0);
4348                                                 }
4349                                         }
4350                                         if (tcp_data(skb, sk, saddr, len))
4351                                                 kfree_skb(skb, FREE_READ);
4352 
4353                                         if (th->fin)
4354                                                 tcp_fin(skb, sk, th, saddr, dev);
4355                                         release_sock(sk);
4356                                         return(0);
4357                         }
4358         
4359                         if (th->urg) 
4360                         {
4361                                 if (tcp_urg(sk, th, saddr, len)) 
4362                                 {
4363                                         kfree_skb(skb, FREE_READ);
4364                                         release_sock(sk);
4365                                         return(0);
4366                                 }
4367                         }
4368                         if (tcp_data(skb, sk, saddr, len)) 
4369                         {
4370                                 kfree_skb(skb, FREE_READ);
4371                                 release_sock(sk);
4372                                 return(0);
4373                         }
4374         
4375                         if (!th->fin) 
4376                         {
4377                                 release_sock(sk);
4378                                 return(0);
4379                         }
4380                         tcp_fin(skb, sk, th, saddr, dev);
4381                         release_sock(sk);
4382                         return(0);
4383         }
4384 }
4385 
4386 
4387 /*
4388  * This routine sends a packet with an out of date sequence
4389  * number. It assumes the other end will try to ack it.
4390  */
4391 
4392 static void tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4393 {
4394         struct sk_buff *buff;
4395         struct tcphdr *t1;
4396         struct device *dev=NULL;
4397         int tmp;
4398 
4399         if (sk->zapped)
4400                 return; /* After a valid reset we can send no more */
4401 
4402         /*
4403          * Write data can still be transmitted/retransmitted in the
4404          * following states.  If any other state is encountered, return.
4405          */
4406 
4407         if (sk->state != TCP_ESTABLISHED && 
4408             sk->state != TCP_CLOSE_WAIT &&
4409             sk->state != TCP_FIN_WAIT1 && 
4410             sk->state != TCP_LAST_ACK &&
4411             sk->state != TCP_CLOSING
4412         ) {
4413                 return;
4414         }
4415 
4416         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
4417         if (buff == NULL) 
4418                 return;
4419 
4420         buff->len = sizeof(struct tcphdr);
4421         buff->free = 1;
4422         buff->sk = sk;
4423         buff->localroute = sk->localroute;
4424 
4425         t1 = (struct tcphdr *) buff->data;
4426 
4427         /* Put in the IP header and routing stuff. */
4428         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
4429                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
4430         if (tmp < 0) 
4431         {
4432                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
4433                 return;
4434         }
4435 
4436         buff->len += tmp;
4437         t1 = (struct tcphdr *)((char *)t1 +tmp);
4438 
4439         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
4440 
4441         /*
4442          * Use a previous sequence.
4443          * This should cause the other end to send an ack.
4444          */
4445         t1->seq = htonl(sk->sent_seq-1);
4446         t1->ack = 1; 
4447         t1->res1= 0;
4448         t1->res2= 0;
4449         t1->rst = 0;
4450         t1->urg = 0;
4451         t1->psh = 0;
4452         t1->fin = 0;
4453         t1->syn = 0;
4454         t1->ack_seq = ntohl(sk->acked_seq);
4455         t1->window = ntohs(tcp_select_window(sk));
4456         t1->doff = sizeof(*t1)/4;
4457         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
4458 
4459          /*     Send it and free it.
4460           *     This will prevent the timer from automatically being restarted.
4461           */
4462         sk->prot->queue_xmit(sk, dev, buff, 1);
4463         tcp_statistics.TcpOutSegs++;
4464 }
4465 
4466 void
4467 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4468 {
4469         if (sk->zapped)
4470                 return;         /* After a valid reset we can send no more */
4471 
4472         tcp_write_wakeup(sk);
4473 
4474         sk->backoff++;
4475         sk->rto = min(sk->rto << 1, 120*HZ);
4476         reset_timer (sk, TIME_PROBE0, sk->rto);
4477         sk->retransmits++;
4478         sk->prot->retransmits ++;
4479 }
4480 
4481 /*
4482  *      Socket option code for TCP. 
4483  */
4484   
4485 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4486 {
4487         int val,err;
4488 
4489         if(level!=SOL_TCP)
4490                 return ip_setsockopt(sk,level,optname,optval,optlen);
4491 
4492         if (optval == NULL) 
4493                 return(-EINVAL);
4494 
4495         err=verify_area(VERIFY_READ, optval, sizeof(int));
4496         if(err)
4497                 return err;
4498         
4499         val = get_fs_long((unsigned long *)optval);
4500 
4501         switch(optname)
4502         {
4503                 case TCP_MAXSEG:
4504 /*
4505  * values greater than interface MTU won't take effect.  however at
4506  * the point when this call is done we typically don't yet know
4507  * which interface is going to be used
4508  */
4509                         if(val<1||val>MAX_WINDOW)
4510                                 return -EINVAL;
4511                         sk->user_mss=val;
4512                         return 0;
4513                 case TCP_NODELAY:
4514                         sk->nonagle=(val==0)?0:1;
4515                         return 0;
4516                 default:
4517                         return(-ENOPROTOOPT);
4518         }
4519 }
4520 
4521 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4522 {
4523         int val,err;
4524 
4525         if(level!=SOL_TCP)
4526                 return ip_getsockopt(sk,level,optname,optval,optlen);
4527                         
4528         switch(optname)
4529         {
4530                 case TCP_MAXSEG:
4531                         val=sk->user_mss;
4532                         break;
4533                 case TCP_NODELAY:
4534                         val=sk->nonagle;
4535                         break;
4536                 default:
4537                         return(-ENOPROTOOPT);
4538         }
4539         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
4540         if(err)
4541                 return err;
4542         put_fs_long(sizeof(int),(unsigned long *) optlen);
4543 
4544         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
4545         if(err)
4546                 return err;
4547         put_fs_long(val,(unsigned long *)optval);
4548 
4549         return(0);
4550 }       
4551 
4552 
4553 struct proto tcp_prot = {
4554         sock_wmalloc,
4555         sock_rmalloc,
4556         sock_wfree,
4557         sock_rfree,
4558         sock_rspace,
4559         sock_wspace,
4560         tcp_close,
4561         tcp_read,
4562         tcp_write,
4563         tcp_sendto,
4564         tcp_recvfrom,
4565         ip_build_header,
4566         tcp_connect,
4567         tcp_accept,
4568         ip_queue_xmit,
4569         tcp_retransmit,
4570         tcp_write_wakeup,
4571         tcp_read_wakeup,
4572         tcp_rcv,
4573         tcp_select,
4574         tcp_ioctl,
4575         NULL,
4576         tcp_shutdown,
4577         tcp_setsockopt,
4578         tcp_getsockopt,
4579         128,
4580         0,
4581         {NULL,},
4582         "TCP"
4583 };

/* [previous][next][first][last][top][bottom][index][help] */