root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. __print_th
  3. print_th
  4. tcp_select_window
  5. tcp_time_wait
  6. tcp_retransmit
  7. tcp_err
  8. tcp_readable
  9. tcp_select
  10. tcp_ioctl
  11. tcp_check
  12. tcp_send_check
  13. tcp_send_skb
  14. tcp_dequeue_partial
  15. tcp_send_partial
  16. tcp_enqueue_partial
  17. tcp_send_ack
  18. tcp_build_header
  19. tcp_write
  20. tcp_sendto
  21. tcp_read_wakeup
  22. cleanup_rbuf
  23. tcp_read_urg
  24. tcp_read
  25. tcp_shutdown
  26. tcp_recvfrom
  27. tcp_reset
  28. tcp_options
  29. default_mask
  30. tcp_conn_request
  31. tcp_close
  32. tcp_write_xmit
  33. sort_send
  34. tcp_ack
  35. tcp_data
  36. tcp_check_urg
  37. tcp_urg
  38. tcp_fin
  39. tcp_accept
  40. tcp_connect
  41. tcp_sequence
  42. tcp_rcv
  43. tcp_write_wakeup
  44. tcp_send_probe0
  45. tcp_setsockopt
  46. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *
  20  * Fixes:       
  21  *              Alan Cox        :       Numerous verify_area() calls
  22  *              Alan Cox        :       Set the ACK bit on a reset
  23  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  24  *                                      and was trying to connect (tcp_err()).
  25  *              Alan Cox        :       All icmp error handling was broken
  26  *                                      pointers passed where wrong and the
  27  *                                      socket was looked up backwards. Nobody
  28  *                                      tested any icmp error code obviously.
  29  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  30  *                                      on errors. select behaves and the icmp error race
  31  *                                      has gone by moving it into sock.c
  32  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  33  *                                      packets for unknown sockets.
  34  *              Alan Cox        :       tcp option processing.
  35  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  36  *              Herp Rosmanith  :       More reset fixes
  37  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  38  *                                      any kind of RST is right out.
  39  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  40  *                                      otherwise odd bits of prattle escape still
  41  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  42  *                                      LAN workplace lockups.
  43  *              Alan Cox        :       Some tidyups using the new skb list facilities
  44  *              Alan Cox        :       sk->keepopen now seems to work
  45  *              Alan Cox        :       Pulls options out correctly on accepts
  46  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  47  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  48  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  49  *              Alan Cox        :       Added some beter commenting, as the tcp is hard to follow
  50  *              Alan Cox        :       Removed incorrect check for 20 * psh
  51  *      Michael O'Reilly        :       ack < copied bug fix.
  52  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  53  *              Alan Cox        :       FIN with no memory -> CRASH
  54  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  55  *              Alan Cox        :       Added TCP options (SOL_TCP)
  56  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  57  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  58  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  59  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  60  *              Alan Cox        :       Put in missing check for SYN bit.
  61  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  62  *                                      window non shrink trick.
  63  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  64  *              Charles Hedrick :       TCP fixes
  65  *              Toomas Tamm     :       TCP window fixes
  66  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  67  *              Charles Hedrick :       Rewrote most of it to actually work
  68  *              Linus           :       Rewrote tcp_read() and URG handling
  69  *                                      completely
  70  *              Gerhard Koerting:       Fixed some missing timer handling
  71  *              Matthew Dillon  :       Reworked TCP machine states as per RFC
  72  *
  73  *
  74  * To Fix:
  75  *                      Possibly a problem with accept(). BSD accept never fails after
  76  *              it causes a select. Linux can - given the official select semantics I
  77  *              feel that _really_ its the BSD network programs that are bust (notably
  78  *              inetd, which hangs occasionally because of this).
  79  *                      Protocol closedown badly messed up.
  80  *
  81  *              This program is free software; you can redistribute it and/or
  82  *              modify it under the terms of the GNU General Public License
  83  *              as published by the Free Software Foundation; either version
  84  *              2 of the License, or(at your option) any later version.
  85  *
  86  * Description of States:
  87  *
  88  *      TCP_SYN_SENT            sent a connection request, waiting for ack
  89  *
  90  *      TCP_SYN_RECV            received a connection request, sent ack,
  91  *                              waiting for final ack in three-way handshake.
  92  *
  93  *      TCP_ESTABLISHED         connection established
  94  *
  95  *      TCP_FIN_WAIT1           our side has shutdown, waiting to complete
  96  *                              transmission of remaining buffered data
  97  *
  98  *      TCP_FIN_WAIT2           all buffered data sent, waiting for remote
  99  *                              to shutdown
 100  *
 101  *      TCP_CLOSING             both sides have shutdown but we still have
 102  *                              data we have to finish sending
 103  *
 104  *      TCP_TIME_WAIT           timeout to catch resent junk before entering
 105  *                              closed, can only be entered from FIN_WAIT2
 106  *                              or CLOSING.  Required because the other end
 107  *                              may not have gotten our last ACK causing it
 108  *                              to retransmit the data packet (which we ignore)
 109  *
 110  *      TCP_CLOSE_WAIT          remote side has shutdown and is waiting for
 111  *                              us to finish writing our data and to shutdown
 112  *                              (we have to close() to move on to LAST_ACK)
 113  *
 114  *      TCP_LAST_ACK            out side has shutdown after remote has
 115  *                              shutdown.  There may still be data in our
 116  *                              buffer that we have to finish sending
 117  *              
 118  *      TCP_CLOSED              socket is finished
 119  */
 120 #include <linux/types.h>
 121 #include <linux/sched.h>
 122 #include <linux/mm.h>
 123 #include <linux/string.h>
 124 #include <linux/socket.h>
 125 #include <linux/sockios.h>
 126 #include <linux/termios.h>
 127 #include <linux/in.h>
 128 #include <linux/fcntl.h>
 129 #include <linux/inet.h>
 130 #include <linux/netdevice.h>
 131 #include "snmp.h"
 132 #include "ip.h"
 133 #include "protocol.h"
 134 #include "icmp.h"
 135 #include "tcp.h"
 136 #include <linux/skbuff.h>
 137 #include "sock.h"
 138 #include <linux/errno.h>
 139 #include <linux/timer.h>
 140 #include <asm/system.h>
 141 #include <asm/segment.h>
 142 #include <linux/mm.h>
 143 
 144 #define SEQ_TICK 3
 145 unsigned long seq_offset;
 146 struct tcp_mib  tcp_statistics;
 147 
 148 
 149 static __inline__ int 
 150 min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 151 {
 152   if (a < b) return(a);
 153   return(b);
 154 }
 155 
 156 
 157 static void __print_th(struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
 158 {
 159         unsigned char *ptr;
 160 
 161         printk("TCP header:\n");
 162         printk("    source=%d, dest=%d, seq =%ld, ack_seq = %ld\n",
 163                 ntohs(th->source), ntohs(th->dest),
 164                 ntohl(th->seq), ntohl(th->ack_seq));
 165         printk("    fin=%d, syn=%d, rst=%d, psh=%d, ack=%d, urg=%d res1=%d res2=%d\n",
 166                 th->fin, th->syn, th->rst, th->psh, th->ack,
 167                 th->urg, th->res1, th->res2);
 168         printk("    window = %d, check = %d urg_ptr = %d\n",
 169                 ntohs(th->window), ntohs(th->check), ntohs(th->urg_ptr));
 170         printk("    doff = %d\n", th->doff);
 171         ptr =(unsigned char *)(th + 1);
 172         printk("    options = %d %d %d %d\n", ptr[0], ptr[1], ptr[2], ptr[3]);
 173 }
 174 
 175 static inline void print_th(struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
 176 {
 177         if (inet_debug == DBG_TCP)
 178                 __print_th(th);
 179 }
 180 
 181 
 182 /* This routine picks a TCP windows for a socket based on
 183    the following constraints
 184    
 185    1. The window can never be shrunk once it is offered (RFC 793)
 186    2. We limit memory per socket
 187    
 188    For now we use NET2E3's heuristic of offering half the memory
 189    we have handy. All is not as bad as this seems however because
 190    of two things. Firstly we will bin packets even within the window
 191    in order to get the data we are waiting for into the memory limit.
 192    Secondly we bin common duplicate forms at receive time
 193 
 194    Better heuristics welcome
 195 */
 196    
 197 static int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 198 {
 199         int new_window = sk->prot->rspace(sk);
 200 
 201 /*
 202  * two things are going on here.  First, we don't ever offer a
 203  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 204  * receiver side of SWS as specified in RFC1122.
 205  * Second, we always give them at least the window they
 206  * had before, in order to avoid retracting window.  This
 207  * is technically allowed, but RFC1122 advises against it and
 208  * in practice it causes trouble.
 209  */
 210         if (new_window < min(sk->mss, MAX_WINDOW/2) ||
 211             new_window < sk->window)
 212           return(sk->window);
 213         return(new_window);
 214 }
 215 
 216 /* Enter the time wait state. */
 217 
 218 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 219 {
 220   sk->state = TCP_TIME_WAIT;
 221   sk->shutdown = SHUTDOWN_MASK;
 222   if (!sk->dead)
 223         sk->state_change(sk);
 224   reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 225 }
 226 
 227 /*
 228  *      A timer event has trigger a tcp retransmit timeout. The
 229  *      socket xmit queue is ready and set up to send. Because
 230  *      the ack receive code keeps the queue straight we do
 231  *      nothing clever here.
 232  */
 233 
 234 static void
 235 tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 236 {
 237   if (all) {
 238         ip_retransmit(sk, all);
 239         return;
 240   }
 241 
 242   sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 243   /* sk->ssthresh in theory can be zero.  I guess that's OK */
 244   sk->cong_count = 0;
 245 
 246   sk->cong_window = 1;
 247 
 248   /* Do the actual retransmit. */
 249   ip_retransmit(sk, all);
 250 }
 251 
 252 
 253 /*
 254  * This routine is called by the ICMP module when it gets some
 255  * sort of error condition.  If err < 0 then the socket should
 256  * be closed and the error returned to the user.  If err > 0
 257  * it's just the icmp type << 8 | icmp code.  After adjustment
 258  * header points to the first 8 bytes of the tcp header.  We need
 259  * to find the appropriate port.
 260  */
 261 void
 262 tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 263         unsigned long saddr, struct inet_protocol *protocol)
 264 {
 265   struct tcphdr *th;
 266   struct sock *sk;
 267   struct iphdr *iph=(struct iphdr *)header;
 268   
 269   header+=4*iph->ihl;
 270    
 271   DPRINTF((DBG_TCP, "TCP: tcp_err(%d, hdr=%X, daddr=%X saddr=%X, protocol=%X)\n",
 272                                         err, header, daddr, saddr, protocol));
 273 
 274   th =(struct tcphdr *)header;
 275   sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr);
 276   print_th(th);
 277 
 278   if (sk == NULL) return;
 279   
 280   if(err<0)
 281   {
 282         sk->err = -err;
 283         sk->error_report(sk);
 284         return;
 285   }
 286 
 287   if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) {
 288         /*
 289          * FIXME:
 290          * For now we will just trigger a linear backoff.
 291          * The slow start code should cause a real backoff here.
 292          */
 293         if (sk->cong_window > 4) sk->cong_window--;
 294         return;
 295   }
 296 
 297   DPRINTF((DBG_TCP, "TCP: icmp_err got error\n"));
 298   sk->err = icmp_err_convert[err & 0xff].errno;
 299 
 300   /*
 301    * If we've already connected we will keep trying
 302    * until we time out, or the user gives up.
 303    */
 304   if (icmp_err_convert[err & 0xff].fatal) {
 305         if (sk->state == TCP_SYN_SENT) {
 306                 tcp_statistics.TcpAttemptFails++;
 307                 sk->state = TCP_CLOSE;
 308                 sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 309         }
 310   }
 311   return;
 312 }
 313 
 314 
 315 /*
 316  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 317  *      in the received data queue (ie a frame missing that needs sending to us)
 318  */
 319 
 320 static int
 321 tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 322 {
 323   unsigned long counted;
 324   unsigned long amount;
 325   struct sk_buff *skb;
 326   int sum;
 327   unsigned long flags;
 328 
 329   DPRINTF((DBG_TCP, "tcp_readable(sk=%X)\n", sk));
 330   if(sk && sk->debug)
 331         printk("tcp_readable: %p - ",sk);
 332 
 333   save_flags(flags);
 334   cli();
 335   if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 336   {
 337         restore_flags(flags);
 338         if(sk && sk->debug) 
 339                 printk("empty\n");
 340         return(0);
 341   }
 342   
 343   counted = sk->copied_seq+1;   /* Where we are at the moment */
 344   amount = 0;
 345   
 346   /* Do until a push or until we are out of data. */
 347   do {
 348         if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 349                 break;
 350         sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 351         if (skb->h.th->syn)
 352                 sum++;
 353         if (sum >= 0) {                                 /* Add it up, move on */
 354                 amount += sum;
 355                 if (skb->h.th->syn) amount--;
 356                 counted += sum;
 357         }
 358         if (amount && skb->h.th->psh) break;
 359         skb = skb->next;
 360   } while(skb != (struct sk_buff *)&sk->receive_queue);
 361   if (amount && !sk->urginline && sk->urg_data &&
 362       (sk->urg_seq - sk->copied_seq) <= (counted - sk->copied_seq))
 363         amount--;               /* don't count urg data */
 364   restore_flags(flags);
 365   DPRINTF((DBG_TCP, "tcp readable returning %d bytes\n", amount));
 366   if(sk->debug)
 367         printk("got %lu bytes.\n",amount);
 368   return(amount);
 369 }
 370 
 371 
 372 /*
 373  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 374  *      listening socket has a receive queue of sockets to accept.
 375  */
 376 
 377 static int
 378 tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 379 {
 380   DPRINTF((DBG_TCP, "tcp_select(sk=%X, sel_type = %d, wait = %X)\n",
 381                                                 sk, sel_type, wait));
 382 
 383   sk->inuse = 1;
 384   switch(sel_type) {
 385         case SEL_IN:
 386                 if(sk->debug)
 387                         printk("select in");
 388                 select_wait(sk->sleep, wait);
 389                 if(sk->debug)
 390                         printk("-select out");
 391                 if (skb_peek(&sk->receive_queue) != NULL) {
 392                         if (sk->state == TCP_LISTEN || tcp_readable(sk)) {
 393                                 release_sock(sk);
 394                                 if(sk->debug)
 395                                         printk("-select ok data\n");
 396                                 return(1);
 397                         }
 398                 }
 399                 if (sk->err != 0)       /* Receiver error */
 400                 {
 401                         release_sock(sk);
 402                         if(sk->debug)
 403                                 printk("-select ok error");
 404                         return(1);
 405                 }
 406                 if (sk->shutdown & RCV_SHUTDOWN) {
 407                         release_sock(sk);
 408                         if(sk->debug)
 409                                 printk("-select ok down\n");
 410                         return(1);
 411                 } else {
 412                         release_sock(sk);
 413                         if(sk->debug)
 414                                 printk("-select fail\n");
 415                         return(0);
 416                 }
 417         case SEL_OUT:
 418                 select_wait(sk->sleep, wait);
 419                 if (sk->shutdown & SEND_SHUTDOWN) {
 420                         DPRINTF((DBG_TCP,
 421                                 "write select on shutdown socket.\n"));
 422 
 423                         /* FIXME: should this return an error? */
 424                         release_sock(sk);
 425                         return(0);
 426                 }
 427 
 428                 /*
 429                  * FIXME:
 430                  * Hack so it will probably be able to write
 431                  * something if it says it's ok to write.
 432                  */
 433                 if (sk->prot->wspace(sk) >= sk->mss) {
 434                         release_sock(sk);
 435                         /* This should cause connect to work ok. */
 436                         if (sk->state == TCP_SYN_RECV ||
 437                             sk->state == TCP_SYN_SENT) return(0);
 438                         return(1);
 439                 }
 440                 DPRINTF((DBG_TCP,
 441                         "tcp_select: sleeping on write sk->wmem_alloc = %d, "
 442                         "sk->packets_out = %d\n"
 443                         "sk->write_seq = %u, sk->window_seq=%u\n", 
 444                                 sk->wmem_alloc, sk->packets_out,
 445                                 sk->write_seq, sk->window_seq));
 446 
 447                 release_sock(sk);
 448                 return(0);
 449         case SEL_EX:
 450                 select_wait(sk->sleep,wait);
 451                 if (sk->err || sk->urg_data) {
 452                         release_sock(sk);
 453                         return(1);
 454                 }
 455                 release_sock(sk);
 456                 return(0);
 457   }
 458 
 459   release_sock(sk);
 460   return(0);
 461 }
 462 
 463 
 464 int
 465 tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 466 {
 467   int err;
 468   DPRINTF((DBG_TCP, "tcp_ioctl(sk=%X, cmd = %d, arg=%X)\n", sk, cmd, arg));
 469   switch(cmd) {
 470         case DDIOCSDBG:
 471                 return(dbg_ioctl((void *) arg, DBG_TCP));
 472 
 473         case TIOCINQ:
 474 #ifdef FIXME    /* FIXME: */
 475         case FIONREAD:
 476 #endif
 477                 {
 478                         unsigned long amount;
 479 
 480                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 481 
 482                         sk->inuse = 1;
 483                         amount = tcp_readable(sk);
 484                         release_sock(sk);
 485                         DPRINTF((DBG_TCP, "returning %d\n", amount));
 486                         err=verify_area(VERIFY_WRITE,(void *)arg,
 487                                                    sizeof(unsigned long));
 488                         if(err)
 489                                 return err;
 490                         put_fs_long(amount,(unsigned long *)arg);
 491                         return(0);
 492                 }
 493         case SIOCATMARK:
 494                 {
 495                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
 496 
 497                         err = verify_area(VERIFY_WRITE,(void *) arg,
 498                                                   sizeof(unsigned long));
 499                         if (err)
 500                                 return err;
 501                         put_fs_long(answ,(int *) arg);
 502                         return(0);
 503                 }
 504         case TIOCOUTQ:
 505                 {
 506                         unsigned long amount;
 507 
 508                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 509                         amount = sk->prot->wspace(sk);
 510                         err=verify_area(VERIFY_WRITE,(void *)arg,
 511                                                    sizeof(unsigned long));
 512                         if(err)
 513                                 return err;
 514                         put_fs_long(amount,(unsigned long *)arg);
 515                         return(0);
 516                 }
 517         default:
 518                 return(-EINVAL);
 519   }
 520 }
 521 
 522 
 523 /* This routine computes a TCP checksum. */
 524 unsigned short
 525 tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 526           unsigned long saddr, unsigned long daddr)
 527 {     
 528   unsigned long sum;
 529    
 530   if (saddr == 0) saddr = ip_my_addr();
 531   print_th(th);
 532   __asm__("\t addl %%ecx,%%ebx\n"
 533           "\t adcl %%edx,%%ebx\n"
 534           "\t adcl $0, %%ebx\n"
 535           : "=b"(sum)
 536           : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 537           : "cx","bx","dx" );
 538    
 539   if (len > 3) {
 540         __asm__("\tclc\n"
 541                 "1:\n"
 542                 "\t lodsl\n"
 543                 "\t adcl %%eax, %%ebx\n"
 544                 "\t loop 1b\n"
 545                 "\t adcl $0, %%ebx\n"
 546                 : "=b"(sum) , "=S"(th)
 547                 : "0"(sum), "c"(len/4) ,"1"(th)
 548                 : "ax", "cx", "bx", "si" );
 549   }
 550    
 551   /* Convert from 32 bits to 16 bits. */
 552   __asm__("\t movl %%ebx, %%ecx\n"
 553           "\t shrl $16,%%ecx\n"
 554           "\t addw %%cx, %%bx\n"
 555           "\t adcw $0, %%bx\n"
 556           : "=b"(sum)
 557           : "0"(sum)
 558           : "bx", "cx");
 559    
 560   /* Check for an extra word. */
 561   if ((len & 2) != 0) {
 562         __asm__("\t lodsw\n"
 563                 "\t addw %%ax,%%bx\n"
 564                 "\t adcw $0, %%bx\n"
 565                 : "=b"(sum), "=S"(th)
 566                 : "0"(sum) ,"1"(th)
 567                 : "si", "ax", "bx");
 568   }
 569    
 570   /* Now check for the extra byte. */
 571   if ((len & 1) != 0) {
 572         __asm__("\t lodsb\n"
 573                 "\t movb $0,%%ah\n"
 574                 "\t addw %%ax,%%bx\n"
 575                 "\t adcw $0, %%bx\n"
 576                 : "=b"(sum)
 577                 : "0"(sum) ,"S"(th)
 578                 : "si", "ax", "bx");
 579   }
 580    
 581   /* We only want the bottom 16 bits, but we never cleared the top 16. */
 582   return((~sum) & 0xffff);
 583 }
 584 
 585 
 586 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 587                 unsigned long daddr, int len, struct sock *sk)
 588 {
 589         th->check = 0;
 590         th->check = tcp_check(th, len, saddr, daddr);
 591         return;
 592 }
 593 
 594 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 595 {
 596         int size;
 597         struct tcphdr * th = skb->h.th;
 598 
 599         /* length of packet (not counting length of pre-tcp headers) */
 600         size = skb->len - ((unsigned char *) th - skb->data);
 601 
 602         /* sanity check it.. */
 603         if (size < sizeof(struct tcphdr) || size > skb->len) {
 604                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 605                         skb, skb->data, th, skb->len);
 606                 kfree_skb(skb, FREE_WRITE);
 607                 return;
 608         }
 609 
 610         /* If we have queued a header size packet.. */
 611         if (size == sizeof(struct tcphdr)) {
 612                 /* If its got a syn or fin its notionally included in the size..*/
 613                 if(!th->syn && !th->fin) {
 614                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 615                         kfree_skb(skb,FREE_WRITE);
 616                         return;
 617                 }
 618         }
 619 
 620         tcp_statistics.TcpOutSegs++;  
 621         /* We need to complete and send the packet. */
 622         tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
 623 
 624         skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
 625         if (after(skb->h.seq, sk->window_seq) ||
 626             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 627              sk->packets_out >= sk->cong_window) {
 628                 DPRINTF((DBG_TCP, "sk->cong_window = %d, sk->packets_out = %d\n",
 629                                         sk->cong_window, sk->packets_out));
 630                 DPRINTF((DBG_TCP, "sk->write_seq = %d, sk->window_seq = %d\n",
 631                                         sk->write_seq, sk->window_seq));
 632                 if (skb->next != NULL) {
 633                         printk("tcp_send_partial: next != NULL\n");
 634                         skb_unlink(skb);
 635                 }
 636                 skb_queue_tail(&sk->write_queue, skb);
 637                 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 638                     sk->send_head == NULL &&
 639                     sk->ack_backlog == 0)
 640                   reset_timer(sk, TIME_PROBE0, sk->rto);
 641         } else {
 642                 sk->sent_seq = sk->write_seq;
 643                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 644         }
 645 }
 646 
 647 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 648 {
 649         struct sk_buff * skb;
 650         unsigned long flags;
 651 
 652         save_flags(flags);
 653         cli();
 654         skb = sk->partial;
 655         if (skb) {
 656                 sk->partial = NULL;
 657                 del_timer(&sk->partial_timer);
 658         }
 659         restore_flags(flags);
 660         return skb;
 661 }
 662 
 663 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 664 {
 665         struct sk_buff *skb;
 666 
 667         if (sk == NULL)
 668                 return;
 669         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 670                 tcp_send_skb(sk, skb);
 671 }
 672 
 673 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 674 {
 675         struct sk_buff * tmp;
 676         unsigned long flags;
 677 
 678         save_flags(flags);
 679         cli();
 680         tmp = sk->partial;
 681         if (tmp)
 682                 del_timer(&sk->partial_timer);
 683         sk->partial = skb;
 684         sk->partial_timer.expires = HZ;
 685         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 686         sk->partial_timer.data = (unsigned long) sk;
 687         add_timer(&sk->partial_timer);
 688         restore_flags(flags);
 689         if (tmp)
 690                 tcp_send_skb(sk, tmp);
 691 }
 692 
 693 
 694 /* This routine sends an ack and also updates the window. */
 695 static void
 696 tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 697              struct sock *sk,
 698              struct tcphdr *th, unsigned long daddr)
 699 {
 700         struct sk_buff *buff;
 701         struct tcphdr *t1;
 702         struct device *dev = NULL;
 703         int tmp;
 704 
 705         if(sk->zapped)
 706                 return;         /* We have been reset, we may not send again */
 707         /*
 708          * We need to grab some memory, and put together an ack,
 709          * and then put it into the queue to be sent.
 710          */
 711 
 712         buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 713         if (buff == NULL) 
 714         {
 715                 /* Force it to send an ack. */
 716                 sk->ack_backlog++;
 717                 if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) 
 718                 {
 719                         reset_timer(sk, TIME_WRITE, 10);
 720                 }
 721                 if (inet_debug == DBG_SLIP)
 722                         printk("\rtcp_ack: malloc failed\n");
 723                 return;
 724         }
 725 
 726         buff->len = sizeof(struct tcphdr);
 727         buff->sk = sk;
 728         buff->localroute = sk->localroute;
 729         t1 =(struct tcphdr *) buff->data;
 730 
 731         /* Put in the IP header and routing stuff. */
 732         tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 733                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 734         if (tmp < 0) 
 735         {
 736                 buff->free=1;
 737                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 738                 if (inet_debug == DBG_SLIP)
 739                         printk("\rtcp_ack: build_header failed\n");
 740                 return;
 741         }
 742         buff->len += tmp;
 743         t1 =(struct tcphdr *)((char *)t1 +tmp);
 744 
 745         /* FIXME: */
 746         memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 747 
 748         /*
 749          *      Swap the send and the receive. 
 750          */
 751         t1->dest = th->source;
 752         t1->source = th->dest;
 753         t1->seq = ntohl(sequence);
 754         t1->ack = 1;
 755         sk->window = tcp_select_window(sk);
 756         t1->window = ntohs(sk->window);
 757         t1->res1 = 0;
 758         t1->res2 = 0;
 759         t1->rst = 0;
 760         t1->urg = 0;
 761         t1->syn = 0;
 762         t1->psh = 0;
 763         t1->fin = 0;
 764         if (ack == sk->acked_seq) 
 765         {
 766                 sk->ack_backlog = 0;
 767                 sk->bytes_rcv = 0;
 768                 sk->ack_timed = 0;
 769                 if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
 770                                   && sk->timeout == TIME_WRITE) 
 771                 {
 772                         if(sk->keepopen)
 773                                 reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 774                         else
 775                                 delete_timer(sk);
 776                 }
 777         }
 778         t1->ack_seq = ntohl(ack);
 779         t1->doff = sizeof(*t1)/4;
 780         tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 781         if (sk->debug)
 782                  printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 783         tcp_statistics.TcpOutSegs++;
 784         sk->prot->queue_xmit(sk, dev, buff, 1);
 785 }
 786 
 787 
 788 /* This routine builds a generic TCP header. */
 789 static int
 790 tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 791 {
 792 
 793   /* FIXME: want to get rid of this. */
 794   memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 795   th->seq = htonl(sk->write_seq);
 796   th->psh =(push == 0) ? 1 : 0;
 797   th->doff = sizeof(*th)/4;
 798   th->ack = 1;
 799   th->fin = 0;
 800   sk->ack_backlog = 0;
 801   sk->bytes_rcv = 0;
 802   sk->ack_timed = 0;
 803   th->ack_seq = htonl(sk->acked_seq);
 804   sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/;
 805   th->window = htons(sk->window);
 806 
 807   return(sizeof(*th));
 808 }
 809 
 810 /*
 811  * This routine copies from a user buffer into a socket,
 812  * and starts the transmit system.
 813  */
 814 static int tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 815           int len, int nonblock, unsigned flags)
 816 {
 817         int copied = 0;
 818         int copy;
 819         int tmp;
 820         struct sk_buff *skb;
 821         struct sk_buff *send_tmp;
 822         unsigned char *buff;
 823         struct proto *prot;
 824         struct device *dev = NULL;
 825 
 826         DPRINTF((DBG_TCP, "tcp_write(sk=%X, from=%X, len=%d, nonblock=%d, flags=%X)\n",
 827                                         sk, from, len, nonblock, flags));
 828 
 829         sk->inuse=1;
 830         prot = sk->prot;
 831         while(len > 0) 
 832         {
 833                 if (sk->err) 
 834                 {                       /* Stop on an error */
 835                         release_sock(sk);
 836                         if (copied) 
 837                                 return(copied);
 838                         tmp = -sk->err;
 839                         sk->err = 0;
 840                         return(tmp);
 841                 }
 842 
 843         /*
 844          *      First thing we do is make sure that we are established. 
 845          */
 846         
 847                 if (sk->shutdown & SEND_SHUTDOWN) 
 848                 {
 849                         release_sock(sk);
 850                         sk->err = EPIPE;
 851                         if (copied) 
 852                                 return(copied);
 853                         sk->err = 0;
 854                         return(-EPIPE);
 855                 }
 856 
 857 
 858         /* 
 859          *      Wait for a connection to finish.
 860          */
 861         
 862                 while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
 863                 {
 864                         if (sk->err) 
 865                         {
 866                                 release_sock(sk);
 867                                 if (copied) 
 868                                         return(copied);
 869                                 tmp = -sk->err;
 870                                 sk->err = 0;
 871                                 return(tmp);
 872                         }
 873 
 874                         if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
 875                         {
 876                                 release_sock(sk);
 877                                 DPRINTF((DBG_TCP, "tcp_write: return 1\n"));
 878                                 if (copied) 
 879                                         return(copied);
 880 
 881                                 if (sk->err) 
 882                                 {
 883                                         tmp = -sk->err;
 884                                         sk->err = 0;
 885                                         return(tmp);
 886                                 }
 887 
 888                                 if (sk->keepopen) 
 889                                 {
 890                                         send_sig(SIGPIPE, current, 0);
 891                                 }
 892                                 return(-EPIPE);
 893                         }
 894 
 895                         if (nonblock || copied) 
 896                         {
 897                                 release_sock(sk);
 898                                 DPRINTF((DBG_TCP, "tcp_write: return 2\n"));
 899                                 if (copied) 
 900                                         return(copied);
 901                                 return(-EAGAIN);
 902                         }
 903 
 904                         release_sock(sk);
 905                         cli();
 906                 
 907                         if (sk->state != TCP_ESTABLISHED &&
 908                                 sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
 909                         {
 910                                 interruptible_sleep_on(sk->sleep);
 911                                 if (current->signal & ~current->blocked) 
 912                                 {
 913                                         sti();
 914                                         DPRINTF((DBG_TCP, "tcp_write: return 3\n"));
 915                                         if (copied) 
 916                                                 return(copied);
 917                                         return(-ERESTARTSYS);
 918                                 }
 919                         }
 920                         sk->inuse = 1;
 921                         sti();
 922                 }
 923 
 924         /*
 925          * The following code can result in copy <= if sk->mss is ever
 926          * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
 927          * sk->mtu is constant once SYN processing is finished.  I.e. we
 928          * had better not get here until we've seen his SYN and at least one
 929          * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
 930          * But ESTABLISHED should guarantee that.  sk->max_window is by definition
 931          * non-decreasing.  Note that any ioctl to set user_mss must be done
 932          * before the exchange of SYN's.  If the initial ack from the other
 933          * end has a window of 0, max_window and thus mss will both be 0.
 934          */
 935 
 936         /* 
 937          *      Now we need to check if we have a half built packet. 
 938          */
 939 
 940                 if ((skb = tcp_dequeue_partial(sk)) != NULL) 
 941                 {
 942                         int hdrlen;
 943 
 944                          /* IP header + TCP header */
 945                         hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
 946                                  + sizeof(struct tcphdr);
 947         
 948                         /* Add more stuff to the end of skb->len */
 949                         if (!(flags & MSG_OOB)) 
 950                         {
 951                                 copy = min(sk->mss - (skb->len - hdrlen), len);
 952                                 /* FIXME: this is really a bug. */
 953                                 if (copy <= 0) 
 954                                 {
 955                                         printk("TCP: **bug**: \"copy\" <= 0!!\n");
 956                                         copy = 0;
 957                                 }
 958           
 959                                 memcpy_fromfs(skb->data + skb->len, from, copy);
 960                                 skb->len += copy;
 961                                 from += copy;
 962                                 copied += copy;
 963                                 len -= copy;
 964                                 sk->write_seq += copy;
 965                         }
 966                         if ((skb->len - hdrlen) >= sk->mss ||
 967                                 (flags & MSG_OOB) || !sk->packets_out)
 968                                 tcp_send_skb(sk, skb);
 969                         else
 970                                 tcp_enqueue_partial(skb, sk);
 971                         continue;
 972                 }
 973 
 974         /*
 975          * We also need to worry about the window.
 976          * If window < 1/2 the maximum window we've seen from this
 977          *   host, don't use it.  This is sender side
 978          *   silly window prevention, as specified in RFC1122.
 979          *   (Note that this is diffferent than earlier versions of
 980          *   SWS prevention, e.g. RFC813.).  What we actually do is 
 981          *   use the whole MSS.  Since the results in the right
 982          *   edge of the packet being outside the window, it will
 983          *   be queued for later rather than sent.
 984          */
 985 
 986                 copy = sk->window_seq - sk->write_seq;
 987                 if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
 988                         copy = sk->mss;
 989                 if (copy > len)
 990                         copy = len;
 991 
 992         /*
 993          *      We should really check the window here also. 
 994          */
 995          
 996                 send_tmp = NULL;
 997                 if (copy < sk->mss && !(flags & MSG_OOB)) 
 998                 {
 999                         /*
1000                          *      We will release the socket incase we sleep here. 
1001                          */
1002                         release_sock(sk);
1003                         /*
1004                          *      NB: following must be mtu, because mss can be increased.
1005                          *      mss is always <= mtu 
1006                          */
1007                         skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
1008                         sk->inuse = 1;
1009                         send_tmp = skb;
1010                 } 
1011                 else 
1012                 {
1013                         /*
1014                          *      We will release the socket incase we sleep here. 
1015                          */
1016                         release_sock(sk);
1017                         skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
1018                         sk->inuse = 1;
1019                 }
1020 
1021                 /*
1022                  *      If we didn't get any memory, we need to sleep. 
1023                  */
1024 
1025                 if (skb == NULL) 
1026                 {
1027                         if (nonblock /* || copied */) 
1028                         {
1029                                 release_sock(sk);
1030                                 DPRINTF((DBG_TCP, "tcp_write: return 4\n"));
1031                                 if (copied) 
1032                                         return(copied);
1033                                 return(-EAGAIN);
1034                         }
1035 
1036                         /*
1037                          *      FIXME: here is another race condition. 
1038                          */
1039 
1040                         tmp = sk->wmem_alloc;
1041                         release_sock(sk);
1042                         cli();
1043                         /*
1044                          *      Again we will try to avoid it. 
1045                          */
1046                         if (tmp <= sk->wmem_alloc &&
1047                                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
1048                                 && sk->err == 0) 
1049                         {
1050                                 interruptible_sleep_on(sk->sleep);
1051                                 if (current->signal & ~current->blocked) 
1052                                 {
1053                                         sti();
1054                                         DPRINTF((DBG_TCP, "tcp_write: return 5\n"));
1055                                         if (copied) 
1056                                                 return(copied);
1057                                         return(-ERESTARTSYS);
1058                                 }
1059                         }
1060                         sk->inuse = 1;
1061                         sti();
1062                         continue;
1063                 }
1064 
1065                 skb->len = 0;
1066                 skb->sk = sk;
1067                 skb->free = 0;
1068                 skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
1069         
1070                 buff = skb->data;
1071         
1072                 /*
1073                  * FIXME: we need to optimize this.
1074                  * Perhaps some hints here would be good.
1075                  */
1076                 
1077                 tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1078                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1079                 if (tmp < 0 ) 
1080                 {
1081                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1082                         release_sock(sk);
1083                         DPRINTF((DBG_TCP, "tcp_write: return 6\n"));
1084                         if (copied) 
1085                                 return(copied);
1086                         return(tmp);
1087                 }
1088                 skb->len += tmp;
1089                 skb->dev = dev;
1090                 buff += tmp;
1091                 skb->h.th =(struct tcphdr *) buff;
1092                 tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1093                 if (tmp < 0) 
1094                 {
1095                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1096                         release_sock(sk);
1097                         DPRINTF((DBG_TCP, "tcp_write: return 7\n"));
1098                         if (copied) 
1099                                 return(copied);
1100                         return(tmp);
1101                 }
1102 
1103                 if (flags & MSG_OOB) 
1104                 {
1105                         ((struct tcphdr *)buff)->urg = 1;
1106                         ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1107                 }
1108                 skb->len += tmp;
1109                 memcpy_fromfs(buff+tmp, from, copy);
1110 
1111                 from += copy;
1112                 copied += copy;
1113                 len -= copy;
1114                 skb->len += copy;
1115                 skb->free = 0;
1116                 sk->write_seq += copy;
1117         
1118                 if (send_tmp != NULL && sk->packets_out) 
1119                 {
1120                         tcp_enqueue_partial(send_tmp, sk);
1121                         continue;
1122                 }
1123                 tcp_send_skb(sk, skb);
1124         }
1125         sk->err = 0;
1126 
1127 /*
1128  *      Nagles rule. Turn Nagle off with TCP_NODELAY for highly
1129  *      interactive fast network servers. It's meant to be on and
1130  *      it really improves the throughput though not the echo time
1131  *      on my slow slip link - Alan
1132  */
1133 
1134 /*
1135  *      Avoid possible race on send_tmp - c/o Johannes Stille 
1136  */
1137  
1138         if(sk->partial && ((!sk->packets_out) 
1139      /* If not nagling we can send on the before case too.. */
1140               || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1141         ))
1142                 tcp_send_partial(sk);
1143 
1144         release_sock(sk);
1145         DPRINTF((DBG_TCP, "tcp_write: return 8\n"));
1146         return(copied);
1147 }
1148 
1149 
1150 static int tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1151            int len, int nonblock, unsigned flags,
1152            struct sockaddr_in *addr, int addr_len)
1153 {
1154         struct sockaddr_in sin;
1155 
1156         if (flags & ~(MSG_OOB|MSG_DONTROUTE))
1157                 return -EINVAL;
1158         if (addr_len < sizeof(sin)) 
1159                 return(-EINVAL);
1160         memcpy_fromfs(&sin, addr, sizeof(sin));
1161         if (sin.sin_family && sin.sin_family != AF_INET) 
1162                 return(-EINVAL);
1163         if (sin.sin_port != sk->dummy_th.dest) 
1164                 return(-EINVAL);
1165         if (sin.sin_addr.s_addr != sk->daddr) 
1166                 return(-EINVAL);
1167         return(tcp_write(sk, from, len, nonblock, flags));
1168 }
1169 
1170 
1171 static void
1172 tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1173 {
1174         int tmp;
1175         struct device *dev = NULL;
1176         struct tcphdr *t1;
1177         struct sk_buff *buff;
1178 
1179         DPRINTF((DBG_TCP, "in tcp read wakeup\n"));
1180         if (!sk->ack_backlog) 
1181                 return;
1182 
1183         /*
1184          * FIXME: we need to put code here to prevent this routine from
1185          * being called.  Being called once in a while is ok, so only check
1186          * if this is the second time in a row.
1187          */
1188 
1189         /*
1190          * We need to grab some memory, and put together an ack,
1191          * and then put it into the queue to be sent.
1192          */
1193 
1194         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1195         if (buff == NULL) 
1196         {
1197                 /* Try again real soon. */
1198                 reset_timer(sk, TIME_WRITE, 10);
1199                 return;
1200         }
1201 
1202         buff->len = sizeof(struct tcphdr);
1203         buff->sk = sk;
1204         buff->localroute = sk->localroute;
1205         
1206         /*
1207          *      Put in the IP header and routing stuff. 
1208          */
1209 
1210         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1211                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1212         if (tmp < 0) 
1213         {
1214                 buff->free=1;
1215                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1216                 return;
1217         }
1218 
1219         buff->len += tmp;
1220         t1 =(struct tcphdr *)(buff->data +tmp);
1221 
1222         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1223         t1->seq = htonl(sk->sent_seq);
1224         t1->ack = 1;
1225         t1->res1 = 0;
1226         t1->res2 = 0;
1227         t1->rst = 0;
1228         t1->urg = 0;
1229         t1->syn = 0;
1230         t1->psh = 0;
1231         sk->ack_backlog = 0;
1232         sk->bytes_rcv = 0;
1233         sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
1234         t1->window = ntohs(sk->window);
1235         t1->ack_seq = ntohl(sk->acked_seq);
1236         t1->doff = sizeof(*t1)/4;
1237         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1238         sk->prot->queue_xmit(sk, dev, buff, 1);
1239         tcp_statistics.TcpOutSegs++;
1240 }
1241 
1242 
1243 /*
1244  *      FIXME:
1245  *      This routine frees used buffers.
1246  *      It should consider sending an ACK to let the
1247  *      other end know we now have a bigger window.
1248  */
1249 
1250 static void cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1251 {
1252         unsigned long flags;
1253         int left;
1254         struct sk_buff *skb;
1255 
1256         if(sk->debug)
1257                 printk("cleaning rbuf for sk=%p\n", sk);
1258   
1259         save_flags(flags);
1260         cli();
1261   
1262         left = sk->prot->rspace(sk);
1263  
1264         /*
1265          * We have to loop through all the buffer headers,
1266          * and try to free up all the space we can.
1267          */
1268 
1269         while((skb=skb_peek(&sk->receive_queue)) != NULL) 
1270         {
1271                 if (!skb->used) 
1272                         break;
1273                 skb_unlink(skb);
1274                 skb->sk = sk;
1275                 kfree_skb(skb, FREE_READ);
1276         }
1277 
1278         restore_flags(flags);
1279 
1280         /*
1281          * FIXME:
1282          * At this point we should send an ack if the difference
1283          * in the window, and the amount of space is bigger than
1284          * TCP_WINDOW_DIFF.
1285          */
1286 
1287         DPRINTF((DBG_TCP, "sk->window left = %d, sk->prot->rspace(sk)=%d\n",
1288                         sk->window - sk->bytes_rcv, sk->prot->rspace(sk)));
1289 
1290         if(sk->debug)
1291                 printk("sk->rspace = %lu, was %d\n", sk->prot->rspace(sk),
1292                                             left);
1293         if (sk->prot->rspace(sk) != left) 
1294         {
1295                 /*
1296                  * This area has caused the most trouble.  The current strategy
1297                  * is to simply do nothing if the other end has room to send at
1298                  * least 3 full packets, because the ack from those will auto-
1299                  * matically update the window.  If the other end doesn't think
1300                  * we have much space left, but we have room for atleast 1 more
1301                  * complete packet than it thinks we do, we will send an ack
1302                  * immediatedly.  Otherwise we will wait up to .5 seconds in case
1303                  * the user reads some more.
1304                  */
1305                 sk->ack_backlog++;
1306         /*
1307          * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1308          * if the other end is offering a window smaller than the agreed on MSS
1309          * (called sk->mtu here).  In theory there's no connection between send
1310          * and receive, and so no reason to think that they're going to send
1311          * small packets.  For the moment I'm using the hack of reducing the mss
1312          * only on the send side, so I'm putting mtu here.
1313          */
1314 
1315                 if ((sk->prot->rspace(sk) > (sk->window - sk->bytes_rcv + sk->mtu))) 
1316                 {
1317                         /* Send an ack right now. */
1318                         tcp_read_wakeup(sk);
1319                 } 
1320                 else 
1321                 {
1322                         /* Force it to send an ack soon. */
1323                         int was_active = del_timer(&sk->timer);
1324                         if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
1325                         {
1326                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1327                         } 
1328                         else
1329                                 add_timer(&sk->timer);
1330                 }
1331         }
1332 } 
1333 
1334 
1335 /* Handle reading urgent data. */
1336 static int
1337 tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1338              unsigned char *to, int len, unsigned flags)
1339 {
1340         struct wait_queue wait = { current, NULL };
1341 
1342         while (len > 0) {
1343                 if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1344                         return -EINVAL;
1345                 if (sk->urg_data & URG_VALID) {
1346                         char c = sk->urg_data;
1347                         if (!(flags & MSG_PEEK))
1348                                 sk->urg_data = URG_READ;
1349                         put_fs_byte(c, to);
1350                         return 1;
1351                 }
1352 
1353                 if (sk->err) {
1354                         int tmp = -sk->err;
1355                         sk->err = 0;
1356                         return tmp;
1357                 }
1358 
1359                 if (sk->state == TCP_CLOSE || sk->done) {
1360                         if (!sk->done) {
1361                                 sk->done = 1;
1362                                 return 0;
1363                         }
1364                         return -ENOTCONN;
1365                 }
1366 
1367                 if (sk->shutdown & RCV_SHUTDOWN) {
1368                         sk->done = 1;
1369                         return 0;
1370                 }
1371 
1372                 if (nonblock)
1373                         return -EAGAIN;
1374 
1375                 if (current->signal & ~current->blocked)
1376                         return -ERESTARTSYS;
1377 
1378                 current->state = TASK_INTERRUPTIBLE;
1379                 add_wait_queue(sk->sleep, &wait);
1380                 if ((sk->urg_data & URG_NOTYET) && sk->err == 0 &&
1381                     !(sk->shutdown & RCV_SHUTDOWN))
1382                         schedule();
1383                 remove_wait_queue(sk->sleep, &wait);
1384                 current->state = TASK_RUNNING;
1385         }
1386         return 0;
1387 }
1388 
1389 
1390 /* This routine copies from a sock struct into the user buffer. */
1391 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1392         int len, int nonblock, unsigned flags)
1393 {
1394         struct wait_queue wait = { current, NULL };
1395         int copied = 0;
1396         unsigned long peek_seq;
1397         unsigned long *seq;
1398         unsigned long used;
1399         int err;
1400 
1401         if (len == 0)
1402                 return 0;
1403 
1404         if (len < 0)
1405                 return -EINVAL;
1406 
1407         err = verify_area(VERIFY_WRITE, to, len);
1408         if (err)
1409                 return err;
1410 
1411         /* This error should be checked. */
1412         if (sk->state == TCP_LISTEN)
1413                 return -ENOTCONN;
1414 
1415         /* Urgent data needs to be handled specially. */
1416         if (flags & MSG_OOB)
1417                 return tcp_read_urg(sk, nonblock, to, len, flags);
1418 
1419         peek_seq = sk->copied_seq;
1420         seq = &sk->copied_seq;
1421         if (flags & MSG_PEEK)
1422                 seq = &peek_seq;
1423 
1424         add_wait_queue(sk->sleep, &wait);
1425         sk->inuse = 1;
1426         while (len > 0) {
1427                 struct sk_buff * skb;
1428                 unsigned long offset;
1429         
1430                 /*
1431                  * are we at urgent data? Stop if we have read anything.
1432                  */
1433                 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1434                         break;
1435 
1436                 current->state = TASK_INTERRUPTIBLE;
1437 
1438                 skb = skb_peek(&sk->receive_queue);
1439                 do {
1440                         if (!skb)
1441                                 break;
1442                         if (before(1+*seq, skb->h.th->seq))
1443                                 break;
1444                         offset = 1 + *seq - skb->h.th->seq;
1445                         if (skb->h.th->syn)
1446                                 offset--;
1447                         if (offset < skb->len)
1448                                 goto found_ok_skb;
1449                         if (!(flags & MSG_PEEK))
1450                                 skb->used = 1;
1451                         skb = skb->next;
1452                 } while (skb != (struct sk_buff *)&sk->receive_queue);
1453 
1454                 if (copied)
1455                         break;
1456 
1457                 if (sk->err) {
1458                         copied = -sk->err;
1459                         sk->err = 0;
1460                         break;
1461                 }
1462 
1463                 if (sk->state == TCP_CLOSE) {
1464                         if (!sk->done) {
1465                                 sk->done = 1;
1466                                 break;
1467                         }
1468                         copied = -ENOTCONN;
1469                         break;
1470                 }
1471 
1472                 if (sk->shutdown & RCV_SHUTDOWN) {
1473                         sk->done = 1;
1474                         break;
1475                 }
1476                         
1477                 if (nonblock) {
1478                         copied = -EAGAIN;
1479                         break;
1480                 }
1481 
1482                 cleanup_rbuf(sk);
1483                 release_sock(sk);
1484                 schedule();
1485                 sk->inuse = 1;
1486 
1487                 if (current->signal & ~current->blocked) {
1488                         copied = -ERESTARTSYS;
1489                         break;
1490                 }
1491                 continue;
1492 
1493         found_ok_skb:
1494                 /* Ok so how much can we use ? */
1495                 used = skb->len - offset;
1496                 if (len < used)
1497                         used = len;
1498                 /* do we have urgent data here? */
1499                 if (sk->urg_data) {
1500                         unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1501                         if (urg_offset < used) {
1502                                 if (!urg_offset) {
1503                                         if (!sk->urginline) {
1504                                                 ++*seq;
1505                                                 offset++;
1506                                                 used--;
1507                                         }
1508                                 } else
1509                                         used = urg_offset;
1510                         }
1511                 }
1512                 /* Copy it */
1513                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1514                         skb->h.th->doff*4 + offset, used);
1515                 copied += used;
1516                 len -= used;
1517                 to += used;
1518                 *seq += used;
1519                 if (after(sk->copied_seq+1,sk->urg_seq))
1520                         sk->urg_data = 0;
1521                 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1522                         skb->used = 1;
1523         }
1524         remove_wait_queue(sk->sleep, &wait);
1525         current->state = TASK_RUNNING;
1526 
1527         /* Clean up data we have read: This will do ACK frames */
1528         cleanup_rbuf(sk);
1529         release_sock(sk);
1530         DPRINTF((DBG_TCP, "tcp_read: returning %d\n", copied));
1531         return copied;
1532 }
1533 
1534  
1535 /*
1536  * Shutdown the sending side of a connection.
1537  */
1538 
1539 void tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1540 {
1541         struct sk_buff *buff;
1542         struct tcphdr *t1, *th;
1543         struct proto *prot;
1544         int tmp;
1545         struct device *dev = NULL;
1546 
1547         /*
1548          * We need to grab some memory, and put together a FIN,
1549          * and then put it into the queue to be sent.
1550          * FIXME:
1551          *
1552          *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1553          *      Most of this is guesswork, so maybe it will work...
1554          */
1555 
1556         if (!(how & SEND_SHUTDOWN)) 
1557                 return;
1558          
1559         /*
1560          *      If we've already sent a FIN, return. 
1561          */
1562          
1563         if (sk->state == TCP_FIN_WAIT1 ||
1564             sk->state == TCP_FIN_WAIT2 ||
1565             sk->state == TCP_CLOSING ||
1566             sk->state == TCP_LAST_ACK ||
1567             sk->state == TCP_TIME_WAIT
1568         ) {
1569                 return;
1570         }
1571         sk->inuse = 1;
1572 
1573         /*
1574          * flag that the sender has shutdown
1575          */
1576 
1577         sk->shutdown |= SEND_SHUTDOWN;
1578 
1579         /*
1580          *  Clear out any half completed packets. 
1581          */
1582 
1583         if (sk->partial)
1584                 tcp_send_partial(sk);
1585 
1586         prot =(struct proto *)sk->prot;
1587         th =(struct tcphdr *)&sk->dummy_th;
1588         release_sock(sk); /* incase the malloc sleeps. */
1589         buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1590         if (buff == NULL)
1591                 return;
1592         sk->inuse = 1;
1593 
1594         DPRINTF((DBG_TCP, "tcp_shutdown_send buff = %X\n", buff));
1595         buff->sk = sk;
1596         buff->len = sizeof(*t1);
1597         buff->localroute = sk->localroute;
1598         t1 =(struct tcphdr *) buff->data;
1599 
1600         /*
1601          *      Put in the IP header and routing stuff. 
1602          */
1603 
1604         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1605                            IPPROTO_TCP, sk->opt,
1606                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1607         if (tmp < 0) 
1608         {
1609                 /*
1610                  *      Finish anyway, treat this as a send that got lost. 
1611                  *
1612                  *      Enter FIN_WAIT1 on normal shutdown, which waits for
1613                  *      written data to be completely acknowledged along
1614                  *      with an acknowledge to our FIN.
1615                  *
1616                  *      Enter FIN_WAIT2 on abnormal shutdown -- close before
1617                  *      connection established.
1618                  */
1619                 buff->free=1;
1620                 prot->wfree(sk,buff->mem_addr, buff->mem_len);
1621 
1622                 if (sk->state == TCP_ESTABLISHED)
1623                         sk->state = TCP_FIN_WAIT1;
1624                 else if(sk->state == TCP_CLOSE_WAIT)
1625                         sk->state = TCP_LAST_ACK;
1626                 else
1627                         sk->state = TCP_FIN_WAIT2;
1628 
1629                 release_sock(sk);
1630                 DPRINTF((DBG_TCP, "Unable to build header for fin.\n"));
1631                 return;
1632         }
1633 
1634         t1 =(struct tcphdr *)((char *)t1 +tmp);
1635         buff->len += tmp;
1636         buff->dev = dev;
1637         memcpy(t1, th, sizeof(*t1));
1638         t1->seq = ntohl(sk->write_seq);
1639         sk->write_seq++;
1640         buff->h.seq = sk->write_seq;
1641         t1->ack = 1;
1642         t1->ack_seq = ntohl(sk->acked_seq);
1643         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
1644         t1->fin = 1;
1645         t1->rst = 0;
1646         t1->doff = sizeof(*t1)/4;
1647         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1648 
1649         /*
1650          * Can't just queue this up.
1651          * It should go at the end of the write queue.
1652          */
1653         
1654         if (skb_peek(&sk->write_queue) != NULL) 
1655         {
1656                 buff->free=0;
1657                 if (buff->next != NULL) 
1658                 {
1659                         printk("tcp_shutdown: next != NULL\n");
1660                         skb_unlink(buff);
1661                 }
1662                 skb_queue_tail(&sk->write_queue, buff);
1663         } 
1664         else 
1665         {
1666                 sk->sent_seq = sk->write_seq;
1667                 sk->prot->queue_xmit(sk, dev, buff, 0);
1668         }
1669 
1670         if (sk->state == TCP_ESTABLISHED) 
1671                 sk->state = TCP_FIN_WAIT1;
1672         else if (sk->state == TCP_CLOSE_WAIT)
1673                 sk->state = TCP_LAST_ACK;
1674         else
1675                 sk->state = TCP_FIN_WAIT2;
1676 
1677         release_sock(sk);
1678 }
1679 
1680 
1681 static int
1682 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1683              int to_len, int nonblock, unsigned flags,
1684              struct sockaddr_in *addr, int *addr_len)
1685 {
1686   struct sockaddr_in sin;
1687   int len;
1688   int err;
1689   int result;
1690   
1691   /* Have to check these first unlike the old code. If 
1692      we check them after we lose data on an error
1693      which is wrong */
1694   err = verify_area(VERIFY_WRITE,addr_len,sizeof(long));
1695   if(err)
1696         return err;
1697   len = get_fs_long(addr_len);
1698   if(len > sizeof(sin))
1699         len = sizeof(sin);
1700   err=verify_area(VERIFY_WRITE, addr, len);  
1701   if(err)
1702         return err;
1703         
1704   result=tcp_read(sk, to, to_len, nonblock, flags);
1705 
1706   if (result < 0) return(result);
1707   
1708   sin.sin_family = AF_INET;
1709   sin.sin_port = sk->dummy_th.dest;
1710   sin.sin_addr.s_addr = sk->daddr;
1711 
1712   memcpy_tofs(addr, &sin, len);
1713   put_fs_long(len, addr_len);
1714   return(result);
1715 }
1716 
1717 
1718 /*
1719  *      This routine will send an RST to the other tcp. 
1720  */
1721  
1722 static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1723           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1724 {
1725         struct sk_buff *buff;
1726         struct tcphdr *t1;
1727         int tmp;
1728         struct device *ndev=NULL;
1729   
1730 /*
1731  * We need to grab some memory, and put together an RST,
1732  * and then put it into the queue to be sent.
1733  */
1734 
1735         buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1736         if (buff == NULL) 
1737                 return;
1738 
1739         DPRINTF((DBG_TCP, "tcp_reset buff = %X\n", buff));
1740         buff->len = sizeof(*t1);
1741         buff->sk = NULL;
1742         buff->dev = dev;
1743         buff->localroute = 0;
1744 
1745         t1 =(struct tcphdr *) buff->data;
1746 
1747         /*
1748          *      Put in the IP header and routing stuff. 
1749          */
1750 
1751         tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
1752                            sizeof(struct tcphdr),tos,ttl);
1753         if (tmp < 0) 
1754         {
1755                 buff->free = 1;
1756                 prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1757                 return;
1758         }
1759 
1760         t1 =(struct tcphdr *)((char *)t1 +tmp);
1761         buff->len += tmp;
1762         memcpy(t1, th, sizeof(*t1));
1763 
1764         /*
1765          *      Swap the send and the receive. 
1766          */
1767 
1768         t1->dest = th->source;
1769         t1->source = th->dest;
1770         t1->rst = 1;  
1771         t1->window = 0;
1772   
1773         if(th->ack)
1774         {
1775                 t1->ack = 0;
1776                 t1->seq = th->ack_seq;
1777                 t1->ack_seq = 0;
1778         }
1779         else
1780         {
1781                 t1->ack = 1;
1782                 if(!th->syn)
1783                         t1->ack_seq=htonl(th->seq);
1784                 else
1785                         t1->ack_seq=htonl(th->seq+1);
1786                 t1->seq=0;
1787         }
1788 
1789         t1->syn = 0;
1790         t1->urg = 0;
1791         t1->fin = 0;
1792         t1->psh = 0;
1793         t1->doff = sizeof(*t1)/4;
1794         tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1795         prot->queue_xmit(NULL, dev, buff, 1);
1796         tcp_statistics.TcpOutSegs++;
1797 }
1798 
1799 
1800 /*
1801  *      Look for tcp options. Parses everything but only knows about MSS.
1802  *      This routine is always called with the packet containing the SYN.
1803  *      However it may also be called with the ack to the SYN.  So you
1804  *      can't assume this is always the SYN.  It's always called after
1805  *      we have set up sk->mtu to our own MTU.
1806  */
1807  
1808 static void
1809 tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1810 {
1811   unsigned char *ptr;
1812   int length=(th->doff*4)-sizeof(struct tcphdr);
1813   int mss_seen = 0;
1814     
1815   ptr = (unsigned char *)(th + 1);
1816   
1817   while(length>0)
1818   {
1819         int opcode=*ptr++;
1820         int opsize=*ptr++;
1821         switch(opcode)
1822         {
1823                 case TCPOPT_EOL:
1824                         return;
1825                 case TCPOPT_NOP:
1826                         length-=2;
1827                         continue;
1828                 
1829                 default:
1830                         if(opsize<=2)   /* Avoid silly options looping forever */
1831                                 return;
1832                         switch(opcode)
1833                         {
1834                                 case TCPOPT_MSS:
1835                                         if(opsize==4 && th->syn)
1836                                         {
1837                                                 sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1838                                                 mss_seen = 1;
1839                                         }
1840                                         break;
1841                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1842                         }
1843                         ptr+=opsize-2;
1844                         length-=opsize;
1845         }
1846   }
1847   if (th->syn) {
1848     if (! mss_seen)
1849       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1850   }
1851   sk->mss = min(sk->max_window, sk->mtu);
1852 }
1853 
1854 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1855 {
1856         dst = ntohl(dst);
1857         if (IN_CLASSA(dst))
1858                 return htonl(IN_CLASSA_NET);
1859         if (IN_CLASSB(dst))
1860                 return htonl(IN_CLASSB_NET);
1861         return htonl(IN_CLASSC_NET);
1862 }
1863 
1864 /*
1865  * This routine handles a connection request.
1866  * It should make sure we haven't already responded.
1867  * Because of the way BSD works, we have to send a syn/ack now.
1868  * This also means it will be harder to close a socket which is
1869  * listening.
1870  */
1871 static void
1872 tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1873                  unsigned long daddr, unsigned long saddr,
1874                  struct options *opt, struct device *dev)
1875 {
1876   struct sk_buff *buff;
1877   struct tcphdr *t1;
1878   unsigned char *ptr;
1879   struct sock *newsk;
1880   struct tcphdr *th;
1881   struct device *ndev=NULL;
1882   int tmp;
1883 
1884   DPRINTF((DBG_TCP, "tcp_conn_request(sk = %X, skb = %X, daddr = %X, sadd4= %X, \n"
1885           "                  opt = %X, dev = %X)\n",
1886           sk, skb, daddr, saddr, opt, dev));
1887   
1888   th = skb->h.th;
1889 
1890   /* If the socket is dead, don't accept the connection. */
1891   if (!sk->dead) {
1892         sk->data_ready(sk,0);
1893   } else {
1894         DPRINTF((DBG_TCP, "tcp_conn_request on dead socket\n"));
1895         tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1896         tcp_statistics.TcpAttemptFails++;
1897         kfree_skb(skb, FREE_READ);
1898         return;
1899   }
1900 
1901   /*
1902    * Make sure we can accept more.  This will prevent a
1903    * flurry of syns from eating up all our memory.
1904    */
1905   if (sk->ack_backlog >= sk->max_ack_backlog) {
1906         tcp_statistics.TcpAttemptFails++;
1907         kfree_skb(skb, FREE_READ);
1908         return;
1909   }
1910 
1911   /*
1912    * We need to build a new sock struct.
1913    * It is sort of bad to have a socket without an inode attached
1914    * to it, but the wake_up's will just wake up the listening socket,
1915    * and if the listening socket is destroyed before this is taken
1916    * off of the queue, this will take care of it.
1917    */
1918   newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
1919   if (newsk == NULL) {
1920         /* just ignore the syn.  It will get retransmitted. */
1921         tcp_statistics.TcpAttemptFails++;
1922         kfree_skb(skb, FREE_READ);
1923         return;
1924   }
1925 
1926   DPRINTF((DBG_TCP, "newsk = %X\n", newsk));
1927   memcpy(newsk, sk, sizeof(*newsk));
1928   skb_queue_head_init(&newsk->write_queue);
1929   skb_queue_head_init(&newsk->receive_queue);
1930   newsk->send_head = NULL;
1931   newsk->send_tail = NULL;
1932   skb_queue_head_init(&newsk->back_log);
1933   newsk->rtt = TCP_CONNECT_TIME << 3;
1934   newsk->rto = TCP_CONNECT_TIME;
1935   newsk->mdev = 0;
1936   newsk->max_window = 0;
1937   newsk->cong_window = 1;
1938   newsk->cong_count = 0;
1939   newsk->ssthresh = 0;
1940   newsk->backoff = 0;
1941   newsk->blog = 0;
1942   newsk->intr = 0;
1943   newsk->proc = 0;
1944   newsk->done = 0;
1945   newsk->partial = NULL;
1946   newsk->pair = NULL;
1947   newsk->wmem_alloc = 0;
1948   newsk->rmem_alloc = 0;
1949   newsk->localroute = sk->localroute;
1950 
1951   newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
1952 
1953   newsk->err = 0;
1954   newsk->shutdown = 0;
1955   newsk->ack_backlog = 0;
1956   newsk->acked_seq = skb->h.th->seq+1;
1957   newsk->fin_seq = skb->h.th->seq;
1958   newsk->copied_seq = skb->h.th->seq;
1959   newsk->state = TCP_SYN_RECV;
1960   newsk->timeout = 0;
1961   newsk->write_seq = jiffies * SEQ_TICK - seq_offset;
1962   newsk->window_seq = newsk->write_seq;
1963   newsk->rcv_ack_seq = newsk->write_seq;
1964   newsk->urg_data = 0;
1965   newsk->retransmits = 0;
1966   newsk->destroy = 0;
1967   newsk->timer.data = (unsigned long)newsk;
1968   newsk->timer.function = &net_timer;
1969   newsk->dummy_th.source = skb->h.th->dest;
1970   newsk->dummy_th.dest = skb->h.th->source;
1971 
1972   /* Swap these two, they are from our point of view. */
1973   newsk->daddr = saddr;
1974   newsk->saddr = daddr;
1975 
1976   put_sock(newsk->num,newsk);
1977   newsk->dummy_th.res1 = 0;
1978   newsk->dummy_th.doff = 6;
1979   newsk->dummy_th.fin = 0;
1980   newsk->dummy_th.syn = 0;
1981   newsk->dummy_th.rst = 0;
1982   newsk->dummy_th.psh = 0;
1983   newsk->dummy_th.ack = 0;
1984   newsk->dummy_th.urg = 0;
1985   newsk->dummy_th.res2 = 0;
1986   newsk->acked_seq = skb->h.th->seq + 1;
1987   newsk->copied_seq = skb->h.th->seq;
1988 
1989   /* Grab the ttl and tos values and use them */
1990   newsk->ip_ttl=sk->ip_ttl;
1991   newsk->ip_tos=skb->ip_hdr->tos;
1992 
1993 /* use 512 or whatever user asked for */
1994 /* note use of sk->user_mss, since user has no direct access to newsk */
1995   if (sk->user_mss)
1996     newsk->mtu = sk->user_mss;
1997   else {
1998 #ifdef CONFIG_INET_SNARL        /* Sub Nets ARe Local */
1999     if ((saddr ^ daddr) & default_mask(saddr))
2000 #else
2001     if ((saddr ^ daddr) & dev->pa_mask)
2002 #endif
2003       newsk->mtu = 576 - HEADER_SIZE;
2004     else
2005       newsk->mtu = MAX_WINDOW;
2006   }
2007 /* but not bigger than device MTU */
2008   newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
2009 
2010 /* this will min with what arrived in the packet */
2011   tcp_options(newsk,skb->h.th);
2012 
2013   buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
2014   if (buff == NULL) {
2015         sk->err = -ENOMEM;
2016         newsk->dead = 1;
2017         release_sock(newsk);
2018         kfree_skb(skb, FREE_READ);
2019         tcp_statistics.TcpAttemptFails++;
2020         return;
2021   }
2022   
2023   buff->len = sizeof(struct tcphdr)+4;
2024   buff->sk = newsk;
2025   buff->localroute = newsk->localroute;
2026     
2027   t1 =(struct tcphdr *) buff->data;
2028 
2029   /* Put in the IP header and routing stuff. */
2030   tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
2031                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2032 
2033   /* Something went wrong. */
2034   if (tmp < 0) {
2035         sk->err = tmp;
2036         buff->free=1;
2037         kfree_skb(buff,FREE_WRITE);
2038         newsk->dead = 1;
2039         release_sock(newsk);
2040         skb->sk = sk;
2041         kfree_skb(skb, FREE_READ);
2042         tcp_statistics.TcpAttemptFails++;
2043         return;
2044   }
2045 
2046   buff->len += tmp;
2047   t1 =(struct tcphdr *)((char *)t1 +tmp);
2048   
2049   memcpy(t1, skb->h.th, sizeof(*t1));
2050   buff->h.seq = newsk->write_seq;
2051 
2052   /* Swap the send and the receive. */
2053   t1->dest = skb->h.th->source;
2054   t1->source = newsk->dummy_th.source;
2055   t1->seq = ntohl(newsk->write_seq++);
2056   t1->ack = 1;
2057   newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/
2058   newsk->sent_seq = newsk->write_seq;
2059   t1->window = ntohs(newsk->window);
2060   t1->res1 = 0;
2061   t1->res2 = 0;
2062   t1->rst = 0;
2063   t1->urg = 0;
2064   t1->psh = 0;
2065   t1->syn = 1;
2066   t1->ack_seq = ntohl(skb->h.th->seq+1);
2067   t1->doff = sizeof(*t1)/4+1;
2068 
2069   ptr =(unsigned char *)(t1+1);
2070   ptr[0] = 2;
2071   ptr[1] = 4;
2072   ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2073   ptr[3] =(newsk->mtu) & 0xff;
2074 
2075   tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2076   newsk->prot->queue_xmit(newsk, dev, buff, 0);
2077 
2078   reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_CONNECT_TIME);
2079   skb->sk = newsk;
2080 
2081   /* Charge the sock_buff to newsk. */
2082   sk->rmem_alloc -= skb->mem_len;
2083   newsk->rmem_alloc += skb->mem_len;
2084 
2085   skb_queue_tail(&sk->receive_queue,skb);
2086   sk->ack_backlog++;
2087   release_sock(newsk);
2088   tcp_statistics.TcpOutSegs++;
2089 }
2090 
2091 
2092 static void tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2093 {
2094         struct sk_buff *buff;
2095         int need_reset = 0;
2096         struct tcphdr *t1, *th;
2097         struct proto *prot;
2098         struct device *dev=NULL;
2099         int tmp;
2100 
2101         /*
2102          * We need to grab some memory, and put together a FIN, 
2103          * and then put it into the queue to be sent.
2104          */
2105         DPRINTF((DBG_TCP, "tcp_close((struct sock *)%X, %d)\n",sk, timeout));
2106         sk->inuse = 1;
2107         sk->keepopen = 1;
2108         sk->shutdown = SHUTDOWN_MASK;
2109 
2110         if (!sk->dead) 
2111                 sk->state_change(sk);
2112 
2113         /*
2114          *      We need to flush the recv. buffs. 
2115          */
2116 
2117         if (skb_peek(&sk->receive_queue) != NULL) 
2118         {
2119                 struct sk_buff *skb;
2120                 if(sk->debug)
2121                         printk("Clean rcv queue\n");
2122                 while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
2123                 {
2124                         if(skb->len > 0 && after(skb->h.th->seq + skb->len + 1 , sk->copied_seq))
2125                                 need_reset = 1;
2126                         kfree_skb(skb, FREE_READ);
2127                 }
2128                 if(sk->debug)
2129                         printk("Cleaned.\n");
2130         }
2131 
2132         /*
2133          *      Get rid off any half-completed packets. 
2134          */
2135          
2136         if (sk->partial) 
2137         {
2138                 tcp_send_partial(sk);
2139         }
2140 
2141         switch(sk->state) 
2142         {
2143                 case TCP_FIN_WAIT1:
2144                 case TCP_FIN_WAIT2:
2145                 case TCP_CLOSING:
2146                         /*
2147                          * These states occur when we have already closed out
2148                          * our end.  If there is no timeout, we do not do
2149                          * anything.  We may still be in the middle of sending
2150                          * the remainder of our buffer, for example...
2151                          * resetting the timer would be inappropriate.
2152                          *
2153                          * XXX if retransmit count reaches limit, is tcp_close()
2154                          * called with timeout == 1 ? if not, we need to fix that.
2155                          */
2156 #ifdef NOTDEF
2157                         /* 
2158                          *      Start a timer.
2159                          * original code was 4 * sk->rtt.  In converting to the
2160                          * new rtt representation, we can't quite use that.
2161                          * it seems to make most sense to  use the backed off value
2162                          */
2163                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2164 #endif
2165                         if (timeout) 
2166                                 tcp_time_wait(sk);
2167                         release_sock(sk);
2168                         return; /* break causes a double release - messy */
2169                 case TCP_TIME_WAIT:
2170                 case TCP_LAST_ACK:
2171                         /*
2172                          * A timeout from these states terminates the TCB.
2173                          */
2174                         if (timeout) 
2175                         {
2176                                 sk->state = TCP_CLOSE;
2177                         }
2178                         release_sock(sk);
2179                         return;
2180                 case TCP_LISTEN:
2181                         sk->state = TCP_CLOSE;
2182                         release_sock(sk);
2183                         return;
2184                 case TCP_CLOSE:
2185                         release_sock(sk);
2186                         return;
2187                 case TCP_CLOSE_WAIT:
2188                 case TCP_ESTABLISHED:
2189                 case TCP_SYN_SENT:
2190                 case TCP_SYN_RECV:
2191                         prot =(struct proto *)sk->prot;
2192                         th =(struct tcphdr *)&sk->dummy_th;
2193                         buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2194                         if (buff == NULL) 
2195                         {
2196                                 /* This will force it to try again later. */
2197                                 /* Or it would have if someone released the socket
2198                                    first. Anyway it might work now */
2199                                 release_sock(sk);
2200                                 if (sk->state != TCP_CLOSE_WAIT)
2201                                         sk->state = TCP_ESTABLISHED;
2202                                 reset_timer(sk, TIME_CLOSE, 100);
2203                                 return;
2204                         }
2205                         buff->sk = sk;
2206                         buff->free = 1;
2207                         buff->len = sizeof(*t1);
2208                         buff->localroute = sk->localroute;
2209                         t1 =(struct tcphdr *) buff->data;
2210         
2211                         /*
2212                          *      Put in the IP header and routing stuff. 
2213                          */
2214                         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2215                                          IPPROTO_TCP, sk->opt,
2216                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2217                         if (tmp < 0) 
2218                         {
2219                                 kfree_skb(buff,FREE_WRITE);
2220 
2221                                 /*
2222                                  * Enter FIN_WAIT1 to await completion of
2223                                  * written out data and ACK to our FIN.
2224                                  */
2225 
2226                                 if(sk->state==TCP_ESTABLISHED)
2227                                         sk->state=TCP_FIN_WAIT1;
2228                                 else
2229                                         sk->state=TCP_FIN_WAIT2;
2230                                 reset_timer(sk, TIME_CLOSE,4*sk->rto);
2231                                 if(timeout)
2232                                         tcp_time_wait(sk);
2233 
2234                                 DPRINTF((DBG_TCP, "Unable to build header for fin.\n"));
2235                                 release_sock(sk);
2236                                 return;
2237                         }
2238 
2239                         t1 =(struct tcphdr *)((char *)t1 +tmp);
2240                         buff->len += tmp;
2241                         buff->dev = dev;
2242                         memcpy(t1, th, sizeof(*t1));
2243                         t1->seq = ntohl(sk->write_seq);
2244                         sk->write_seq++;
2245                         buff->h.seq = sk->write_seq;
2246                         t1->ack = 1;
2247         
2248                         /* 
2249                          *      Ack everything immediately from now on. 
2250                          */
2251 
2252                         sk->delay_acks = 0;
2253                         t1->ack_seq = ntohl(sk->acked_seq);
2254                         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
2255                         t1->fin = 1;
2256                         t1->rst = need_reset;
2257                         t1->doff = sizeof(*t1)/4;
2258                         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2259 
2260                         tcp_statistics.TcpOutSegs++;
2261         
2262                         if (skb_peek(&sk->write_queue) == NULL) 
2263                         {
2264                                 sk->sent_seq = sk->write_seq;
2265                                 prot->queue_xmit(sk, dev, buff, 0);
2266                         } 
2267                         else 
2268                         {
2269                                 reset_timer(sk, TIME_WRITE, sk->rto);
2270                                 if (buff->next != NULL) 
2271                                 {
2272                                         printk("tcp_close: next != NULL\n");
2273                                         skb_unlink(buff);
2274                                 }
2275                                 skb_queue_tail(&sk->write_queue, buff);
2276                         }
2277 
2278                         /*
2279                          * If established (normal close), enter FIN_WAIT1.
2280                          * If in CLOSE_WAIT, enter LAST_ACK
2281                          * If in CLOSING, remain in CLOSING
2282                          * otherwise enter FIN_WAIT2
2283                          */
2284 
2285                         if (sk->state == TCP_ESTABLISHED)
2286                             sk->state = TCP_FIN_WAIT1;
2287                         else if (sk->state == TCP_CLOSE_WAIT)
2288                             sk->state = TCP_LAST_ACK;
2289                         else if (sk->state != TCP_CLOSING)
2290                             sk->state = TCP_FIN_WAIT2;
2291         }
2292         release_sock(sk);
2293 }
2294 
2295 
2296 /*
2297  * This routine takes stuff off of the write queue,
2298  * and puts it in the xmit queue.
2299  */
2300 static void
2301 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2302 {
2303   struct sk_buff *skb;
2304 
2305   DPRINTF((DBG_TCP, "tcp_write_xmit(sk=%X)\n", sk));
2306 
2307   /* The bytes will have to remain here. In time closedown will
2308      empty the write queue and all will be happy */
2309   if(sk->zapped)
2310         return;
2311 
2312   while((skb = skb_peek(&sk->write_queue)) != NULL &&
2313         before(skb->h.seq, sk->window_seq + 1) &&
2314         (sk->retransmits == 0 ||
2315          sk->timeout != TIME_WRITE ||
2316          before(skb->h.seq, sk->rcv_ack_seq + 1))
2317         && sk->packets_out < sk->cong_window) {
2318                 IS_SKB(skb);
2319                 skb_unlink(skb);
2320                 DPRINTF((DBG_TCP, "Sending a packet.\n"));
2321 
2322                 /* See if we really need to send the packet. */
2323                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) {
2324                         sk->retransmits = 0;
2325                         kfree_skb(skb, FREE_WRITE);
2326                         if (!sk->dead) sk->write_space(sk);
2327                 } else {
2328                         sk->sent_seq = skb->h.seq;
2329                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2330                 }
2331         }
2332 }
2333 
2334 
2335 /*
2336  * This routine sorts the send list, and resets the
2337  * sk->send_head and sk->send_tail pointers.
2338  */
2339 void
2340 sort_send(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2341 {
2342   struct sk_buff *list = NULL;
2343   struct sk_buff *skb,*skb2,*skb3;
2344 
2345   for (skb = sk->send_head; skb != NULL; skb = skb2) {
2346         skb2 = skb->link3;
2347         if (list == NULL || before (skb2->h.seq, list->h.seq)) {
2348                 skb->link3 = list;
2349                 sk->send_tail = skb;
2350                 list = skb;
2351         } else {
2352                 for (skb3 = list; ; skb3 = skb3->link3) {
2353                         if (skb3->link3 == NULL ||
2354                             before(skb->h.seq, skb3->link3->h.seq)) {
2355                                 skb->link3 = skb3->link3;
2356                                 skb3->link3 = skb;
2357                                 if (skb->link3 == NULL) sk->send_tail = skb;
2358                                 break;
2359                         }
2360                 }
2361         }
2362   }
2363   sk->send_head = list;
2364 }
2365   
2366 
2367 /*
2368  * This routine deals with incoming acks, but not outgoing ones.
2369  */
2370 
2371 static int
2372 tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2373 {
2374   unsigned long ack;
2375   int flag = 0;
2376   /* 
2377    * 1 - there was data in packet as well as ack or new data is sent or 
2378    *     in shutdown state
2379    * 2 - data from retransmit queue was acked and removed
2380    * 4 - window shrunk or data from retransmit queue was acked and removed
2381    */
2382 
2383   if(sk->zapped)
2384         return(1);      /* Dead, cant ack any more so why bother */
2385 
2386   ack = ntohl(th->ack_seq);
2387   DPRINTF((DBG_TCP, "tcp_ack ack=%d, window=%d, "
2388           "sk->rcv_ack_seq=%d, sk->window_seq = %d\n",
2389           ack, ntohs(th->window), sk->rcv_ack_seq, sk->window_seq));
2390 
2391   if (ntohs(th->window) > sk->max_window) {
2392         sk->max_window = ntohs(th->window);
2393         sk->mss = min(sk->max_window, sk->mtu);
2394   }
2395 
2396   if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2397         sk->retransmits = 0;
2398 
2399 /* not quite clear why the +1 and -1 here, and why not +1 in next line */
2400   if (after(ack, sk->sent_seq+1) || before(ack, sk->rcv_ack_seq-1)) {
2401         if (after(ack, sk->sent_seq) ||
2402            (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) {
2403                 return(0);
2404         }
2405         if (sk->keepopen) {
2406                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2407         }
2408         return(1);
2409   }
2410 
2411   if (len != th->doff*4) flag |= 1;
2412 
2413   /* See if our window has been shrunk. */
2414   if (after(sk->window_seq, ack+ntohs(th->window))) {
2415         /*
2416          * We may need to move packets from the send queue
2417          * to the write queue, if the window has been shrunk on us.
2418          * The RFC says you are not allowed to shrink your window
2419          * like this, but if the other end does, you must be able
2420          * to deal with it.
2421          */
2422         struct sk_buff *skb;
2423         struct sk_buff *skb2;
2424         struct sk_buff *wskb = NULL;
2425   
2426         skb2 = sk->send_head;
2427         sk->send_head = NULL;
2428         sk->send_tail = NULL;
2429 
2430         flag |= 4;
2431 
2432         sk->window_seq = ack + ntohs(th->window);
2433         cli();
2434         while (skb2 != NULL) {
2435                 skb = skb2;
2436                 skb2 = skb->link3;
2437                 skb->link3 = NULL;
2438                 if (after(skb->h.seq, sk->window_seq)) {
2439                         if (sk->packets_out > 0) sk->packets_out--;
2440                         /* We may need to remove this from the dev send list. */
2441                         if (skb->next != NULL) {
2442                                 skb_unlink(skb);                                
2443                         }
2444                         /* Now add it to the write_queue. */
2445                         if (wskb == NULL)
2446                                 skb_queue_head(&sk->write_queue,skb);
2447                         else
2448                                 skb_append(wskb,skb);
2449                         wskb = skb;
2450                 } else {
2451                         if (sk->send_head == NULL) {
2452                                 sk->send_head = skb;
2453                                 sk->send_tail = skb;
2454                         } else {
2455                                 sk->send_tail->link3 = skb;
2456                                 sk->send_tail = skb;
2457                         }
2458                         skb->link3 = NULL;
2459                 }
2460         }
2461         sti();
2462   }
2463 
2464   if (sk->send_tail == NULL || sk->send_head == NULL) {
2465         sk->send_head = NULL;
2466         sk->send_tail = NULL;
2467         sk->packets_out= 0;
2468   }
2469 
2470   sk->window_seq = ack + ntohs(th->window);
2471 
2472   /* We don't want too many packets out there. */
2473   if (sk->timeout == TIME_WRITE && 
2474       sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) {
2475 /* 
2476  * This is Jacobson's slow start and congestion avoidance. 
2477  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2478  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2479  * counter and increment it once every cwnd times.  It's possible
2480  * that this should be done only if sk->retransmits == 0.  I'm
2481  * interpreting "new data is acked" as including data that has
2482  * been retransmitted but is just now being acked.
2483  */
2484         if (sk->cong_window < sk->ssthresh)  
2485           /* in "safe" area, increase */
2486           sk->cong_window++;
2487         else {
2488           /* in dangerous area, increase slowly.  In theory this is
2489              sk->cong_window += 1 / sk->cong_window
2490            */
2491           if (sk->cong_count >= sk->cong_window) {
2492             sk->cong_window++;
2493             sk->cong_count = 0;
2494           } else 
2495             sk->cong_count++;
2496         }
2497   }
2498 
2499   DPRINTF((DBG_TCP, "tcp_ack: Updating rcv ack sequence.\n"));
2500   sk->rcv_ack_seq = ack;
2501 
2502   /*
2503    * if this ack opens up a zero window, clear backoff.  It was
2504    * being used to time the probes, and is probably far higher than
2505    * it needs to be for normal retransmission
2506    */
2507   if (sk->timeout == TIME_PROBE0) {
2508         if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
2509             ! before (sk->window_seq, sk->write_queue.next->h.seq)) {
2510           sk->retransmits = 0;
2511           sk->backoff = 0;
2512           /* recompute rto from rtt.  this eliminates any backoff */
2513           sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2514           if (sk->rto > 120*HZ)
2515             sk->rto = 120*HZ;
2516           if (sk->rto < 1*HZ)
2517             sk->rto = 1*HZ;
2518         }
2519   }
2520 
2521   /* See if we can take anything off of the retransmit queue. */
2522   while(sk->send_head != NULL) {
2523         /* Check for a bug. */
2524         if (sk->send_head->link3 &&
2525             after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) {
2526                 printk("INET: tcp.c: *** bug send_list out of order.\n");
2527                 sort_send(sk);
2528         }
2529 
2530         if (before(sk->send_head->h.seq, ack+1)) {
2531                 struct sk_buff *oskb;
2532 
2533                 if (sk->retransmits) {
2534 
2535                   /* we were retransmitting.  don't count this in RTT est */
2536                   flag |= 2;
2537 
2538                   /*
2539                    * even though we've gotten an ack, we're still
2540                    * retransmitting as long as we're sending from
2541                    * the retransmit queue.  Keeping retransmits non-zero
2542                    * prevents us from getting new data interspersed with
2543                    * retransmissions.
2544                    */
2545 
2546                   if (sk->send_head->link3)
2547                     sk->retransmits = 1;
2548                   else
2549                     sk->retransmits = 0;
2550 
2551                 }
2552 
2553                 /*
2554                  * Note that we only reset backoff and rto in the
2555                  * rtt recomputation code.  And that doesn't happen
2556                  * if there were retransmissions in effect.  So the
2557                  * first new packet after the retransmissions is
2558                  * sent with the backoff still in effect.  Not until
2559                  * we get an ack from a non-retransmitted packet do
2560                  * we reset the backoff and rto.  This allows us to deal
2561                  * with a situation where the network delay has increased
2562                  * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2563                  */
2564 
2565                 /* We have one less packet out there. */
2566                 if (sk->packets_out > 0) sk->packets_out --;
2567                 DPRINTF((DBG_TCP, "skb=%X skb->h.seq = %d acked ack=%d\n",
2568                                 sk->send_head, sk->send_head->h.seq, ack));
2569 
2570                 /* Wake up the process, it can probably write more. */
2571                 if (!sk->dead) sk->write_space(sk);
2572 
2573                 oskb = sk->send_head;
2574 
2575                 if (!(flag&2)) {
2576                   long m;
2577 
2578                   /* The following amusing code comes from Jacobson's
2579                    * article in SIGCOMM '88.  Note that rtt and mdev
2580                    * are scaled versions of rtt and mean deviation.
2581                    * This is designed to be as fast as possible 
2582                    * m stands for "measurement".
2583                    */
2584 
2585                   m = jiffies - oskb->when;  /* RTT */
2586                   m -= (sk->rtt >> 3);       /* m is now error in rtt est */
2587                   sk->rtt += m;              /* rtt = 7/8 rtt + 1/8 new */
2588                   if (m < 0)
2589                     m = -m;                  /* m is now abs(error) */
2590                   m -= (sk->mdev >> 2);      /* similar update on mdev */
2591                   sk->mdev += m;             /* mdev = 3/4 mdev + 1/4 new */
2592 
2593                   /* now update timeout.  Note that this removes any backoff */
2594                   sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2595                   if (sk->rto > 120*HZ)
2596                     sk->rto = 120*HZ;
2597                   if (sk->rto < 1*HZ)
2598                     sk->rto = 1*HZ;
2599                   sk->backoff = 0;
2600 
2601                 }
2602                 flag |= (2|4);
2603 
2604                 cli();
2605 
2606                 oskb = sk->send_head;
2607                 IS_SKB(oskb);
2608                 sk->send_head = oskb->link3;
2609                 if (sk->send_head == NULL) {
2610                         sk->send_tail = NULL;
2611                 }
2612 
2613                 /* We may need to remove this from the dev send list. */
2614                 if (oskb->next)
2615                         skb_unlink(oskb);
2616                 sti();
2617                 kfree_skb(oskb, FREE_WRITE); /* write. */
2618                 if (!sk->dead) sk->write_space(sk);
2619         } else {
2620                 break;
2621         }
2622   }
2623 
2624   /*
2625    * Maybe we can take some stuff off of the write queue,
2626    * and put it onto the xmit queue.
2627    */
2628   if (skb_peek(&sk->write_queue) != NULL) {
2629         if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
2630                 (sk->retransmits == 0 || 
2631                  sk->timeout != TIME_WRITE ||
2632                  before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
2633                 && sk->packets_out < sk->cong_window) {
2634                 flag |= 1;
2635                 tcp_write_xmit(sk);
2636         } else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
2637                    sk->send_head == NULL &&
2638                    sk->ack_backlog == 0 &&
2639                    sk->state != TCP_TIME_WAIT) {
2640                 reset_timer(sk, TIME_PROBE0, sk->rto);
2641         }               
2642   } else {
2643         if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2644             sk->state != TCP_TIME_WAIT && !sk->keepopen) {
2645                 DPRINTF((DBG_TCP, "Nothing to do, going to sleep.\n")); 
2646                 if (!sk->dead) sk->write_space(sk);
2647 
2648                 if (sk->keepopen)
2649                         reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2650                 else
2651                         delete_timer(sk);
2652         } else {
2653                 if (sk->state != (unsigned char) sk->keepopen) {
2654                         reset_timer(sk, TIME_WRITE, sk->rto);
2655                 }
2656                 if (sk->state == TCP_TIME_WAIT) {
2657                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2658                 }
2659         }
2660   }
2661 
2662   if (sk->packets_out == 0 && sk->partial != NULL &&
2663       skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) {
2664         flag |= 1;
2665         tcp_send_partial(sk);
2666   }
2667 
2668   /*
2669    * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
2670    * we are now waiting for an acknowledge to our FIN.  The other end is
2671    * already in TIME_WAIT.
2672    *
2673    * Move to TCP_CLOSE on success.
2674    */
2675 
2676   if (sk->state == TCP_LAST_ACK) {
2677         if (!sk->dead)
2678                 sk->state_change(sk);
2679         DPRINTF((DBG_TCP, "TCP_LAST_ACK-A: %d/%d %d/%d ack/sent %d %d\n",
2680             sk->rcv_ack_seq,
2681             sk->write_seq,
2682             sk->acked_seq,
2683             sk->fin_seq,
2684             ack,
2685             sk->sent_seq
2686         ));
2687         if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) {
2688                 DPRINTF((DBG_TCP, "tcp_ack closing socket - %X\n", sk));
2689                 flag |= 1;
2690                 sk->state = TCP_CLOSE;
2691                 sk->shutdown = SHUTDOWN_MASK;
2692         }
2693   }
2694 
2695   /*
2696    * Incomming ACK to a FIN we sent in the case of our initiating the close.
2697    *
2698    * Move to FIN_WAIT2 to await a FIN from the other end.
2699    */
2700 
2701   if (sk->state == TCP_FIN_WAIT1) {
2702         if (!sk->dead) 
2703                 sk->state_change(sk);
2704         if (sk->rcv_ack_seq == sk->write_seq) {
2705                 flag |= 1;
2706                 if (sk->acked_seq != sk->fin_seq) {
2707                         tcp_time_wait(sk);
2708                 } else {
2709                         sk->shutdown = SHUTDOWN_MASK;
2710                         sk->state = TCP_FIN_WAIT2;
2711                 }
2712         }
2713   }
2714 
2715   /*
2716    * Incomming ACK to a FIN we sent in the case of a simultanious close.
2717    *
2718    * Move to TIME_WAIT
2719    */
2720 
2721   if (sk->state == TCP_CLOSING) {
2722         if (!sk->dead) 
2723                 sk->state_change(sk);
2724         if (sk->rcv_ack_seq == sk->write_seq) {
2725                 flag |= 1;
2726                 tcp_time_wait(sk);
2727         }
2728   }
2729 
2730 /*
2731  * I make no guarantees about the first clause in the following
2732  * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
2733  * what conditions "!flag" would be true.  However I think the rest
2734  * of the conditions would prevent that from causing any
2735  * unnecessary retransmission. 
2736  *   Clearly if the first packet has expired it should be 
2737  * retransmitted.  The other alternative, "flag&2 && retransmits", is
2738  * harder to explain:  You have to look carefully at how and when the
2739  * timer is set and with what timeout.  The most recent transmission always
2740  * sets the timer.  So in general if the most recent thing has timed
2741  * out, everything before it has as well.  So we want to go ahead and
2742  * retransmit some more.  If we didn't explicitly test for this
2743  * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
2744  * would not be true.  If you look at the pattern of timing, you can
2745  * show that rto is increased fast enough that the next packet would
2746  * almost never be retransmitted immediately.  Then you'd end up
2747  * waiting for a timeout to send each packet on the retranmission
2748  * queue.  With my implementation of the Karn sampling algorithm,
2749  * the timeout would double each time.  The net result is that it would
2750  * take a hideous amount of time to recover from a single dropped packet.
2751  * It's possible that there should also be a test for TIME_WRITE, but
2752  * I think as long as "send_head != NULL" and "retransmit" is on, we've
2753  * got to be in real retransmission mode.
2754  *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
2755  * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
2756  * As long as no further losses occur, this seems reasonable.
2757  */
2758 
2759   if (((!flag) || (flag&4)) && sk->send_head != NULL &&
2760       (((flag&2) && sk->retransmits) ||
2761        (sk->send_head->when + sk->rto < jiffies))) {
2762         ip_do_retransmit(sk, 1);
2763         reset_timer(sk, TIME_WRITE, sk->rto);
2764       }
2765 
2766   DPRINTF((DBG_TCP, "leaving tcp_ack\n"));
2767   return(1);
2768 }
2769 
2770 
2771 /*
2772  * This routine handles the data.  If there is room in the buffer,
2773  * it will be have already been moved into it.  If there is no
2774  * room, then we will just have to discard the packet.
2775  */
2776 static int
2777 tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
2778          unsigned long saddr, unsigned short len)
2779 {
2780   struct sk_buff *skb1, *skb2;
2781   struct tcphdr *th;
2782   int dup_dumped=0;
2783 
2784   th = skb->h.th;
2785   print_th(th);
2786   skb->len = len -(th->doff*4);
2787 
2788   DPRINTF((DBG_TCP, "tcp_data len = %d sk = %X:\n", skb->len, sk));
2789 
2790   sk->bytes_rcv += skb->len;
2791   if (skb->len == 0 && !th->fin && !th->urg && !th->psh) {
2792         /* Don't want to keep passing ack's back and forth. */
2793         if (!th->ack) tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
2794         kfree_skb(skb, FREE_READ);
2795         return(0);
2796   }
2797 
2798   if (sk->shutdown & RCV_SHUTDOWN) {
2799         sk->acked_seq = th->seq + skb->len + th->syn + th->fin;
2800         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
2801                 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
2802         tcp_statistics.TcpEstabResets++;
2803         sk->state = TCP_CLOSE;
2804         sk->err = EPIPE;
2805         sk->shutdown = SHUTDOWN_MASK;
2806         DPRINTF((DBG_TCP, "tcp_data: closing socket - %X\n", sk));
2807         kfree_skb(skb, FREE_READ);
2808         if (!sk->dead) sk->state_change(sk);
2809         return(0);
2810   }
2811 
2812   /*
2813    * Now we have to walk the chain, and figure out where this one
2814    * goes into it.  This is set up so that the last packet we received
2815    * will be the first one we look at, that way if everything comes
2816    * in order, there will be no performance loss, and if they come
2817    * out of order we will be able to fit things in nicely.
2818    */
2819 
2820   /* This should start at the last one, and then go around forwards. */
2821   if (skb_peek(&sk->receive_queue) == NULL) {
2822         DPRINTF((DBG_TCP, "tcp_data: skb = %X:\n", skb));
2823         skb_queue_head(&sk->receive_queue,skb);
2824         skb1= NULL;
2825   } else {
2826         DPRINTF((DBG_TCP, "tcp_data adding to chain sk = %X:\n", sk));
2827         for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) {
2828                 if(sk->debug)
2829                 {
2830                         printk("skb1=%p :", skb1);
2831                         printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
2832                         printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
2833                         printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
2834                                         sk->acked_seq);
2835                 }
2836                 if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
2837                 {
2838                         skb_append(skb1,skb);
2839                         skb_unlink(skb1);
2840                         kfree_skb(skb1,FREE_READ);
2841                         dup_dumped=1;
2842                         skb1=NULL;
2843                         break;
2844                 }
2845                 if (after(th->seq+1, skb1->h.th->seq))
2846                 {
2847                         skb_append(skb1,skb);
2848                         break;
2849                 }
2850                 if (skb1 == skb_peek(&sk->receive_queue))
2851                 {
2852                         skb_queue_head(&sk->receive_queue, skb);
2853                         break;
2854                 }
2855         }
2856         DPRINTF((DBG_TCP, "skb = %X:\n", skb));
2857   }
2858 
2859   th->ack_seq = th->seq + skb->len;
2860   if (th->syn) th->ack_seq++;
2861   if (th->fin) th->ack_seq++;
2862 
2863   if (before(sk->acked_seq, sk->copied_seq)) {
2864         printk("*** tcp.c:tcp_data bug acked < copied\n");
2865         sk->acked_seq = sk->copied_seq;
2866   }
2867 
2868   /* Now figure out if we can ack anything. */
2869   if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) {
2870       if (before(th->seq, sk->acked_seq+1)) {
2871                 int newwindow;
2872 
2873                 if (after(th->ack_seq, sk->acked_seq)) {
2874                         newwindow = sk->window -
2875                                        (th->ack_seq - sk->acked_seq);
2876                         if (newwindow < 0)
2877                                 newwindow = 0;  
2878                         sk->window = newwindow;
2879                         sk->acked_seq = th->ack_seq;
2880                 }
2881                 skb->acked = 1;
2882 
2883                 /* When we ack the fin, we turn on the RCV_SHUTDOWN flag. */
2884                 if (skb->h.th->fin) {
2885                         if (!sk->dead) sk->state_change(sk);
2886                         sk->shutdown |= RCV_SHUTDOWN;
2887                 }
2888           
2889                 for(skb2 = skb->next;
2890                     skb2 != (struct sk_buff *)&sk->receive_queue;
2891                     skb2 = skb2->next) {
2892                         if (before(skb2->h.th->seq, sk->acked_seq+1)) {
2893                                 if (after(skb2->h.th->ack_seq, sk->acked_seq))
2894                                 {
2895                                         newwindow = sk->window -
2896                                          (skb2->h.th->ack_seq - sk->acked_seq);
2897                                         if (newwindow < 0)
2898                                                 newwindow = 0;  
2899                                         sk->window = newwindow;
2900                                         sk->acked_seq = skb2->h.th->ack_seq;
2901                                 }
2902                                 skb2->acked = 1;
2903 
2904                                 /*
2905                                  * When we ack the fin, we turn on
2906                                  * the RCV_SHUTDOWN flag.
2907                                  */
2908                                 if (skb2->h.th->fin) {
2909                                         sk->shutdown |= RCV_SHUTDOWN;
2910                                         if (!sk->dead) sk->state_change(sk);
2911                                 }
2912 
2913                                 /* Force an immediate ack. */
2914                                 sk->ack_backlog = sk->max_ack_backlog;
2915                         } else {
2916                                 break;
2917                         }
2918                 }
2919 
2920                 /*
2921                  * This also takes care of updating the window.
2922                  * This if statement needs to be simplified.
2923                  */
2924                 if (!sk->delay_acks ||
2925                     sk->ack_backlog >= sk->max_ack_backlog || 
2926                     sk->bytes_rcv > sk->max_unacked || th->fin) {
2927 /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
2928                 } else {
2929                         sk->ack_backlog++;
2930                         if(sk->debug)
2931                                 printk("Ack queued.\n");
2932                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
2933                 }
2934         }
2935   }
2936 
2937   /*
2938    * If we've missed a packet, send an ack.
2939    * Also start a timer to send another.
2940    */
2941   if (!skb->acked) {
2942         /*
2943          * This is important.  If we don't have much room left,
2944          * we need to throw out a few packets so we have a good
2945          * window.  Note that mtu is used, not mss, because mss is really
2946          * for the send side.  He could be sending us stuff as large as mtu.
2947          */
2948         while (sk->prot->rspace(sk) < sk->mtu) {
2949                 skb1 = skb_peek(&sk->receive_queue);
2950                 if (skb1 == NULL) {
2951                         printk("INET: tcp.c:tcp_data memory leak detected.\n");
2952                         break;
2953                 }
2954 
2955                 /* Don't throw out something that has been acked. */
2956                 if (skb1->acked) {
2957                         break;
2958                 }
2959                 
2960                 skb_unlink(skb1);
2961                 kfree_skb(skb1, FREE_READ);
2962         }
2963         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
2964         sk->ack_backlog++;
2965         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
2966   } else {
2967         /* We missed a packet.  Send an ack to try to resync things. */
2968         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
2969   }
2970 
2971   /* Now tell the user we may have some data. */
2972   if (!sk->dead) {
2973         if(sk->debug)
2974                 printk("Data wakeup.\n");
2975         sk->data_ready(sk,0);
2976   } else {
2977         DPRINTF((DBG_TCP, "data received on dead socket.\n"));
2978   }
2979 
2980 #ifdef NOTDEF   /* say what?  this is handled by tcp_ack() */
2981 
2982   if (sk->state == TCP_FIN_WAIT2 &&
2983       sk->acked_seq == sk->fin_seq && sk->rcv_ack_seq == sk->write_seq) {
2984         DPRINTF((DBG_TCP, "tcp_data: entering last_ack state sk = %X\n", sk));
2985 
2986 /*      tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr); */
2987         sk->shutdown = SHUTDOWN_MASK;
2988         sk->state = TCP_LAST_ACK;
2989         if (!sk->dead) sk->state_change(sk);
2990   }
2991 #endif
2992 
2993   return(0);
2994 }
2995 
2996 
2997 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
2998 {
2999         unsigned long ptr = ntohs(th->urg_ptr);
3000 
3001         if (ptr)
3002                 ptr--;
3003         ptr += th->seq;
3004 
3005         /* ignore urgent data that we've already seen and read */
3006         if (after(sk->copied_seq+1, ptr))
3007                 return;
3008 
3009         /* do we already have a newer (or duplicate) urgent pointer? */
3010         if (sk->urg_data && !after(ptr, sk->urg_seq))
3011                 return;
3012 
3013         /* tell the world about our new urgent pointer */
3014         if (sk->proc != 0) {
3015                 if (sk->proc > 0) {
3016                         kill_proc(sk->proc, SIGURG, 1);
3017                 } else {
3018                         kill_pg(-sk->proc, SIGURG, 1);
3019                 }
3020         }
3021         sk->urg_data = URG_NOTYET;
3022         sk->urg_seq = ptr;
3023 }
3024 
3025 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
3026         unsigned long saddr, unsigned long len)
3027 {
3028         unsigned long ptr;
3029 
3030         /* check if we get a new urgent pointer */
3031         if (th->urg)
3032                 tcp_check_urg(sk,th);
3033 
3034         /* do we wait for any urgent data? */
3035         if (sk->urg_data != URG_NOTYET)
3036                 return 0;
3037 
3038         /* is the urgent pointer pointing into this packet? */
3039         ptr = sk->urg_seq - th->seq + th->doff*4;
3040         if (ptr >= len)
3041                 return 0;
3042 
3043         /* ok, got the correct packet, update info */
3044         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
3045         if (!sk->dead)
3046                 sk->data_ready(sk,0);
3047         return 0;
3048 }
3049 
3050 
3051 /*
3052  *  This deals with incoming fins. 'Linus at 9 O'clock' 8-) 
3053  *
3054  *  If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
3055  *  (and thence onto LAST-ACK and finally, CLOSED, we never enter
3056  *  TIME-WAIT)
3057  *
3058  *  If we are in FINWAIT-1, a received FIN indicates simultanious
3059  *  close and we go into CLOSING (and later onto TIME-WAIT)
3060  *
3061  *  If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3062  *
3063  */
3064  
3065 static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
3066          unsigned long saddr, struct device *dev)
3067 {
3068         DPRINTF((DBG_TCP, "tcp_fin(sk=%X, th=%X, saddr=%X, dev=%X)\n",
3069                                         sk, th, saddr, dev));
3070   
3071         sk->fin_seq = th->seq + skb->len + th->syn + th->fin;
3072 
3073         if (!sk->dead) 
3074         {
3075                 sk->state_change(sk);
3076         }
3077 
3078         switch(sk->state) 
3079         {
3080                 case TCP_SYN_RECV:
3081                 case TCP_SYN_SENT:
3082                 case TCP_ESTABLISHED:
3083                         /*
3084                          * move to CLOSE_WAIT, tcp_data() already handled
3085                          * sending the ack.
3086                          */
3087                         reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
3088                         /*sk->fin_seq = th->seq+1;*/
3089                         tcp_statistics.TcpCurrEstab--;
3090                         sk->state = TCP_CLOSE_WAIT;
3091                         if (th->rst)
3092                                 sk->shutdown = SHUTDOWN_MASK;
3093                         break;
3094 
3095                 case TCP_CLOSE_WAIT:
3096                 case TCP_CLOSING:
3097                         /*
3098                          * received a retransmission of the FIN, do
3099                          * nothing.
3100                          */
3101                         break;
3102                 case TCP_TIME_WAIT:
3103                         /*
3104                          * received a retransmission of the FIN,
3105                          * restart the TIME_WAIT timer.
3106                          */
3107                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3108                         return(0);
3109                 case TCP_FIN_WAIT1:
3110                         /*
3111                          * This case occurs when a simultanious close
3112                          * happens, we must ack the received FIN and
3113                          * enter the CLOSING state.
3114                          *
3115                          * XXX timeout not set properly
3116                          */
3117 
3118                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3119                         /*sk->fin_seq = th->seq+1;*/
3120                         sk->state = TCP_CLOSING;
3121                         break;
3122                 case TCP_FIN_WAIT2:
3123                         /*
3124                          * received a FIN -- send ACK and enter TIME_WAIT
3125                          */
3126                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3127                         /*sk->fin_seq = th->seq+1;*/
3128                         sk->state = TCP_TIME_WAIT;
3129                         break;
3130                 case TCP_CLOSE:
3131                         /*
3132                          * already in CLOSE
3133                          */
3134                         break;
3135                 default:
3136                         sk->state = TCP_LAST_ACK;
3137         
3138                         /* Start the timers. */
3139                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3140                         return(0);
3141         }
3142         sk->ack_backlog++;
3143 
3144         return(0);
3145 }
3146 
3147 
3148 /* This will accept the next outstanding connection. */
3149 static struct sock *
3150 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
3151 {
3152   struct sock *newsk;
3153   struct sk_buff *skb;
3154   
3155   DPRINTF((DBG_TCP, "tcp_accept(sk=%X, flags=%X, addr=%s)\n",
3156                                 sk, flags, in_ntoa(sk->saddr)));
3157 
3158   /*
3159    * We need to make sure that this socket is listening,
3160    * and that it has something pending.
3161    */
3162   if (sk->state != TCP_LISTEN) {
3163         sk->err = EINVAL;
3164         return(NULL); 
3165   }
3166 
3167   /* avoid the race. */
3168   cli();
3169   sk->inuse = 1;
3170   while((skb = skb_dequeue(&sk->receive_queue)) == NULL) {
3171         if (flags & O_NONBLOCK) {
3172                 sti();
3173                 release_sock(sk);
3174                 sk->err = EAGAIN;
3175                 return(NULL);
3176         }
3177 
3178         release_sock(sk);
3179         interruptible_sleep_on(sk->sleep);
3180         if (current->signal & ~current->blocked) {
3181                 sti();
3182                 sk->err = ERESTARTSYS;
3183                 return(NULL);
3184         }
3185         sk->inuse = 1;
3186   }
3187   sti();
3188 
3189   /* Now all we need to do is return skb->sk. */
3190   newsk = skb->sk;
3191 
3192   kfree_skb(skb, FREE_READ);
3193   sk->ack_backlog--;
3194   release_sock(sk);
3195   return(newsk);
3196 }
3197 
3198 
3199 /*
3200  *      This will initiate an outgoing connection. 
3201  */
3202  
3203 static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3204 {
3205         struct sk_buff *buff;
3206         struct sockaddr_in sin;
3207         struct device *dev=NULL;
3208         unsigned char *ptr;
3209         int tmp;
3210         struct tcphdr *t1;
3211         int err;
3212 
3213         if (sk->state != TCP_CLOSE) 
3214                 return(-EISCONN);
3215         if (addr_len < 8) 
3216                 return(-EINVAL);
3217 
3218         err=verify_area(VERIFY_READ, usin, addr_len);
3219         if(err)
3220                 return err;
3221         
3222         memcpy_fromfs(&sin,usin, min(sizeof(sin), addr_len));
3223 
3224         if (sin.sin_family && sin.sin_family != AF_INET) 
3225                 return(-EAFNOSUPPORT);
3226 
3227         DPRINTF((DBG_TCP, "TCP connect daddr=%s\n", in_ntoa(sin.sin_addr.s_addr)));
3228         
3229         /*
3230          *      connect() to INADDR_ANY means loopback (BSD'ism).
3231          */
3232         
3233         if(sin.sin_addr.s_addr==INADDR_ANY)
3234                 sin.sin_addr.s_addr=ip_my_addr();
3235                   
3236         /*
3237          *      Don't want a TCP connection going to a broadcast address 
3238          */
3239 
3240         if (ip_chk_addr(sin.sin_addr.s_addr) == IS_BROADCAST) 
3241         { 
3242                 DPRINTF((DBG_TCP, "TCP connection to broadcast address not allowed\n"));
3243                 return(-ENETUNREACH);
3244         }
3245   
3246         /*
3247          *      Connect back to the same socket: Blows up so disallow it 
3248          */
3249 
3250         if(sk->saddr == sin.sin_addr.s_addr && sk->num==ntohs(sin.sin_port))
3251                 return -EBUSY;
3252 
3253         sk->inuse = 1;
3254         sk->daddr = sin.sin_addr.s_addr;
3255         sk->write_seq = jiffies * SEQ_TICK - seq_offset;
3256         sk->window_seq = sk->write_seq;
3257         sk->rcv_ack_seq = sk->write_seq -1;
3258         sk->err = 0;
3259         sk->dummy_th.dest = sin.sin_port;
3260         release_sock(sk);
3261 
3262         buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3263         if (buff == NULL) 
3264         {
3265                 return(-ENOMEM);
3266         }
3267         sk->inuse = 1;
3268         buff->len = 24;
3269         buff->sk = sk;
3270         buff->free = 1;
3271         buff->localroute = sk->localroute;
3272         
3273         t1 = (struct tcphdr *) buff->data;
3274 
3275         /*
3276          *      Put in the IP header and routing stuff. 
3277          */
3278 
3279         /*
3280          *      We need to build the routing stuff fromt the things saved in skb. 
3281          */
3282 
3283         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3284                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3285         if (tmp < 0) 
3286         {
3287                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3288                 release_sock(sk);
3289                 return(-ENETUNREACH);
3290         }
3291 
3292         buff->len += tmp;
3293         t1 = (struct tcphdr *)((char *)t1 +tmp);
3294 
3295         memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3296         t1->seq = ntohl(sk->write_seq++);
3297         sk->sent_seq = sk->write_seq;
3298         buff->h.seq = sk->write_seq;
3299         t1->ack = 0;
3300         t1->window = 2;
3301         t1->res1=0;
3302         t1->res2=0;
3303         t1->rst = 0;
3304         t1->urg = 0;
3305         t1->psh = 0;
3306         t1->syn = 1;
3307         t1->urg_ptr = 0;
3308         t1->doff = 6;
3309         /* use 512 or whatever user asked for */
3310 
3311         if (sk->user_mss)
3312                 sk->mtu = sk->user_mss;
3313         else 
3314         {
3315 #ifdef SUBNETSARELOCAL
3316                 if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3317 #else
3318                 if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3319 #endif
3320                         sk->mtu = 576 - HEADER_SIZE;
3321                 else
3322                         sk->mtu = MAX_WINDOW;
3323         }
3324         /*
3325          *      but not bigger than device MTU 
3326          */
3327 
3328         sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3329 
3330         /*
3331          *      Put in the TCP options to say MTU. 
3332          */
3333 
3334         ptr = (unsigned char *)(t1+1);
3335         ptr[0] = 2;
3336         ptr[1] = 4;
3337         ptr[2] = (sk->mtu) >> 8;
3338         ptr[3] = (sk->mtu) & 0xff;
3339         tcp_send_check(t1, sk->saddr, sk->daddr,
3340                   sizeof(struct tcphdr) + 4, sk);
3341 
3342         /*
3343          *      This must go first otherwise a really quick response will get reset. 
3344          */
3345 
3346         sk->state = TCP_SYN_SENT;
3347         sk->rtt = TCP_CONNECT_TIME;
3348         reset_timer(sk, TIME_WRITE, TCP_CONNECT_TIME);  /* Timer for repeating the SYN until an answer */
3349         sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3350 
3351         sk->prot->queue_xmit(sk, dev, buff, 0);  
3352         tcp_statistics.TcpActiveOpens++;
3353         tcp_statistics.TcpOutSegs++;
3354   
3355         release_sock(sk);
3356         return(0);
3357 }
3358 
3359 
3360 /* This functions checks to see if the tcp header is actually acceptable. */
3361 static int
3362 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3363              struct options *opt, unsigned long saddr, struct device *dev)
3364 {
3365         unsigned long next_seq;
3366 
3367         next_seq = len - 4*th->doff;
3368         if (th->fin)
3369                 next_seq++;
3370         /* if we have a zero window, we can't have any data in the packet.. */
3371         if (next_seq && !sk->window)
3372                 goto ignore_it;
3373         next_seq += th->seq;
3374 
3375         /*
3376          * This isn't quite right.  sk->acked_seq could be more recent
3377          * than sk->window.  This is however close enough.  We will accept
3378          * slightly more packets than we should, but it should not cause
3379          * problems unless someone is trying to forge packets.
3380          */
3381 
3382         /* have we already seen all of this packet? */
3383         if (!after(next_seq+1, sk->acked_seq))
3384                 goto ignore_it;
3385         /* or does it start beyond the window? */
3386         if (!before(th->seq, sk->acked_seq + sk->window + 1))
3387                 goto ignore_it;
3388 
3389         /* ok, at least part of this packet would seem interesting.. */
3390         return 1;
3391 
3392 ignore_it:
3393         DPRINTF((DBG_TCP, "tcp_sequence: rejecting packet.\n"));
3394 
3395         if (th->rst)
3396                 return 0;
3397 
3398         /*
3399          *      Send a reset if we get something not ours and we are
3400          *      unsynchronized. Note: We don't do anything to our end. We
3401          *      are just killing the bogus remote connection then we will
3402          *      connect again and it will work (with luck).
3403          */
3404          
3405         if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) {
3406                 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3407                 return 1;
3408         }
3409 
3410         /* Try to resync things. */
3411         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3412         return 0;
3413 }
3414 
3415 
3416 int
3417 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3418         unsigned long daddr, unsigned short len,
3419         unsigned long saddr, int redo, struct inet_protocol * protocol)
3420 {
3421   struct tcphdr *th;
3422   struct sock *sk;
3423 
3424   if (!skb) {
3425         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv skb = NULL\n"));
3426         return(0);
3427   }
3428 
3429   if (!dev) 
3430   {
3431         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv dev = NULL\n"));
3432         return(0);
3433   }
3434   
3435   tcp_statistics.TcpInSegs++;
3436   
3437   th = skb->h.th;
3438 
3439   /* Find the socket. */
3440   sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3441   DPRINTF((DBG_TCP, "<<\n"));
3442   DPRINTF((DBG_TCP, "len = %d, redo = %d, skb=%X\n", len, redo, skb));
3443   
3444   /* If this socket has got a reset its to all intents and purposes 
3445      really dead */
3446   if (sk!=NULL && sk->zapped)
3447         sk=NULL;
3448 
3449   if (sk) {
3450          DPRINTF((DBG_TCP, "sk = %X:\n", sk));
3451   }
3452 
3453   if (!redo) {
3454         if (tcp_check(th, len, saddr, daddr )) {
3455                 skb->sk = NULL;
3456                 DPRINTF((DBG_TCP, "packet dropped with bad checksum.\n"));
3457 if (inet_debug == DBG_SLIP) printk("\rtcp_rcv: bad checksum\n");
3458                 kfree_skb(skb,FREE_READ);
3459                 /*
3460                  * We don't release the socket because it was
3461                  * never marked in use.
3462                  */
3463                 return(0);
3464         }
3465 
3466         th->seq = ntohl(th->seq);
3467 
3468         /* See if we know about the socket. */
3469         if (sk == NULL) {
3470                 if (!th->rst)
3471                         tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3472                 skb->sk = NULL;
3473                 kfree_skb(skb, FREE_READ);
3474                 return(0);
3475         }
3476 
3477         skb->len = len;
3478         skb->sk = sk;
3479         skb->acked = 0;
3480         skb->used = 0;
3481         skb->free = 0;
3482         skb->saddr = daddr;
3483         skb->daddr = saddr;
3484 
3485         /* We may need to add it to the backlog here. */
3486         cli();
3487         if (sk->inuse) {
3488                 skb_queue_head(&sk->back_log, skb);
3489                 sti();
3490                 return(0);
3491         }
3492         sk->inuse = 1;
3493         sti();
3494   } else {
3495         if (!sk) {
3496                 DPRINTF((DBG_TCP, "tcp.c: tcp_rcv bug sk=NULL redo = 1\n"));
3497                 return(0);
3498         }
3499   }
3500 
3501   if (!sk->prot) {
3502         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv sk->prot = NULL \n"));
3503         return(0);
3504   }
3505 
3506   /* Charge the memory to the socket. */
3507   if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) {
3508         skb->sk = NULL;
3509         DPRINTF((DBG_TCP, "dropping packet due to lack of buffer space.\n"));
3510         kfree_skb(skb, FREE_READ);
3511         release_sock(sk);
3512         return(0);
3513   }
3514   sk->rmem_alloc += skb->mem_len;
3515 
3516   DPRINTF((DBG_TCP, "About to do switch.\n"));
3517 
3518   /* Now deal with it. */
3519   switch(sk->state) {
3520         /*
3521          * This should close the system down if it's waiting
3522          * for an ack that is never going to be sent.
3523          */
3524         case TCP_LAST_ACK:
3525                 if (th->rst) {
3526                         sk->zapped=1;
3527                         sk->err = ECONNRESET;
3528                         sk->state = TCP_CLOSE;
3529                         sk->shutdown = SHUTDOWN_MASK;
3530                         if (!sk->dead) {
3531                                 sk->state_change(sk);
3532                         }
3533                         kfree_skb(skb, FREE_READ);
3534                         release_sock(sk);
3535                         return(0);
3536                 }
3537 
3538         case TCP_ESTABLISHED:
3539         case TCP_CLOSE_WAIT:
3540         case TCP_CLOSING:
3541         case TCP_FIN_WAIT1:
3542         case TCP_FIN_WAIT2:
3543         case TCP_TIME_WAIT:
3544                 if (!tcp_sequence(sk, th, len, opt, saddr,dev)) {
3545                         if (inet_debug == DBG_SLIP) 
3546                                 printk("\rtcp_rcv: not in seq\n");
3547                         kfree_skb(skb, FREE_READ);
3548                         release_sock(sk);
3549                         return(0);
3550                 }
3551 
3552                 if (th->rst) 
3553                 {
3554                         tcp_statistics.TcpEstabResets++;
3555                         tcp_statistics.TcpCurrEstab--;
3556                         sk->zapped=1;
3557                         /* This means the thing should really be closed. */
3558                         sk->err = ECONNRESET;
3559 
3560                         if (sk->state == TCP_CLOSE_WAIT) 
3561                         {
3562                                 sk->err = EPIPE;
3563                         }
3564 
3565                         /*
3566                          * A reset with a fin just means that
3567                          * the data was not all read.
3568                          */
3569                         sk->state = TCP_CLOSE;
3570                         sk->shutdown = SHUTDOWN_MASK;
3571                         if (!sk->dead) 
3572                         {
3573                                 sk->state_change(sk);
3574                         }
3575                         kfree_skb(skb, FREE_READ);
3576                         release_sock(sk);
3577                         return(0);
3578                 }
3579                 if (th->syn) 
3580                 {
3581                         tcp_statistics.TcpCurrEstab--;
3582                         tcp_statistics.TcpEstabResets++;
3583                         sk->err = ECONNRESET;
3584                         sk->state = TCP_CLOSE;
3585                         sk->shutdown = SHUTDOWN_MASK;
3586                         tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
3587                         if (!sk->dead) {
3588                                 sk->state_change(sk);
3589                         }
3590                         kfree_skb(skb, FREE_READ);
3591                         release_sock(sk);
3592                         return(0);
3593                 }
3594 
3595                 if (th->ack && !tcp_ack(sk, th, saddr, len)) {
3596                         kfree_skb(skb, FREE_READ);
3597                         release_sock(sk);
3598                         return(0);
3599                 }
3600 
3601                 if (tcp_urg(sk, th, saddr, len)) {
3602                         kfree_skb(skb, FREE_READ);
3603                         release_sock(sk);
3604                         return(0);
3605                 }
3606 
3607                 if (tcp_data(skb, sk, saddr, len)) {
3608                         kfree_skb(skb, FREE_READ);
3609                         release_sock(sk);
3610                         return(0);
3611                 }
3612 
3613                 /* Moved: you must do data then fin bit */
3614                 if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) {
3615                         kfree_skb(skb, FREE_READ);
3616                         release_sock(sk);
3617                         return(0);
3618                 }
3619 
3620                 release_sock(sk);
3621                 return(0);
3622 
3623         case TCP_CLOSE:
3624                 if (sk->dead || sk->daddr) {
3625                         DPRINTF((DBG_TCP, "packet received for closed,dead socket\n"));
3626                         kfree_skb(skb, FREE_READ);
3627                         release_sock(sk);
3628                         return(0);
3629                 }
3630 
3631                 if (!th->rst) {
3632                         if (!th->ack)
3633                                 th->ack_seq = 0;
3634                         tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3635                 }
3636                 kfree_skb(skb, FREE_READ);
3637                 release_sock(sk);
3638                 return(0);
3639 
3640         case TCP_LISTEN:
3641                 if (th->rst) {
3642                         kfree_skb(skb, FREE_READ);
3643                         release_sock(sk);
3644                         return(0);
3645                 }
3646                 if (th->ack) {
3647                         tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3648                         kfree_skb(skb, FREE_READ);
3649                         release_sock(sk);
3650                         return(0);
3651                 }
3652 
3653                 if (th->syn) 
3654                 {
3655                         /*
3656                          * Now we just put the whole thing including
3657                          * the header and saddr, and protocol pointer
3658                          * into the buffer.  We can't respond until the
3659                          * user tells us to accept the connection.
3660                          */
3661                         tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
3662                         release_sock(sk);
3663                         return(0);
3664                 }
3665 
3666                 kfree_skb(skb, FREE_READ);
3667                 release_sock(sk);
3668                 return(0);
3669 
3670         case TCP_SYN_RECV:
3671                 if (th->syn) {
3672                         /* Probably a retransmitted syn */
3673                         kfree_skb(skb, FREE_READ);
3674                         release_sock(sk);
3675                         return(0);
3676                 }
3677 
3678 
3679         default:
3680                 if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
3681                 {
3682                         kfree_skb(skb, FREE_READ);
3683                         release_sock(sk);
3684                         return(0);
3685                 }
3686 
3687         case TCP_SYN_SENT:
3688                 if (th->rst) 
3689                 {
3690                         tcp_statistics.TcpAttemptFails++;
3691                         sk->err = ECONNREFUSED;
3692                         sk->state = TCP_CLOSE;
3693                         sk->shutdown = SHUTDOWN_MASK;
3694                         sk->zapped = 1;
3695                         if (!sk->dead) 
3696                         {
3697                                 sk->state_change(sk);
3698                         }
3699                         kfree_skb(skb, FREE_READ);
3700                         release_sock(sk);
3701                         return(0);
3702                 }
3703                 if (!th->ack) 
3704                 {
3705                         if (th->syn) 
3706                         {
3707                                 sk->state = TCP_SYN_RECV;
3708                         }
3709 
3710                         kfree_skb(skb, FREE_READ);
3711                         release_sock(sk);
3712                         return(0);
3713                 }
3714 
3715                 switch(sk->state) 
3716                 {
3717                         case TCP_SYN_SENT:
3718                                 if (!tcp_ack(sk, th, saddr, len)) 
3719                                 {
3720                                         tcp_statistics.TcpAttemptFails++;
3721                                         tcp_reset(daddr, saddr, th,
3722                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3723                                         kfree_skb(skb, FREE_READ);
3724                                         release_sock(sk);
3725                                         return(0);
3726                                 }
3727 
3728                                 /*
3729                                  * If the syn bit is also set, switch to
3730                                  * tcp_syn_recv, and then to established.
3731                                  */
3732                                 if (!th->syn) 
3733                                 {
3734                                         kfree_skb(skb, FREE_READ);
3735                                         release_sock(sk);
3736                                         return(0);
3737                                 }
3738 
3739                                 /* Ack the syn and fall through. */
3740                                 sk->acked_seq = th->seq+1;
3741                                 sk->fin_seq = th->seq;
3742                                 tcp_send_ack(sk->sent_seq, th->seq+1,
3743                                                         sk, th, sk->daddr);
3744         
3745                         case TCP_SYN_RECV:
3746                                 if (!tcp_ack(sk, th, saddr, len)) 
3747                                 {
3748                                         tcp_statistics.TcpAttemptFails++;
3749                                         tcp_reset(daddr, saddr, th,
3750                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
3751                                         kfree_skb(skb, FREE_READ);
3752                                         release_sock(sk);
3753                                         return(0);
3754                                 }
3755 
3756                                 tcp_statistics.TcpCurrEstab++;
3757                                 sk->state = TCP_ESTABLISHED;
3758 
3759                                 /*
3760                                  * Now we need to finish filling out
3761                                  * some of the tcp header.
3762                                  */
3763                                 /* We need to check for mtu info. */
3764                                 tcp_options(sk, th);
3765                                 sk->dummy_th.dest = th->source;
3766                                 sk->copied_seq = sk->acked_seq-1;
3767                                 if (!sk->dead) {
3768                                         sk->state_change(sk);
3769                                 }
3770 
3771                                 /*
3772                                  * We've already processed his first
3773                                  * ack.  In just about all cases that
3774                                  * will have set max_window.  This is
3775                                  * to protect us against the possibility
3776                                  * that the initial window he sent was 0.
3777                                  * This must occur after tcp_options, which
3778                                  * sets sk->mtu.
3779                                  */
3780                                 if (sk->max_window == 0) {
3781                                   sk->max_window = 32;
3782                                   sk->mss = min(sk->max_window, sk->mtu);
3783                                 }
3784 
3785                                 /*
3786                                  * Now process the rest like we were
3787                                  * already in the established state.
3788                                  */
3789                                 if (th->urg) {
3790                                         if (tcp_urg(sk, th, saddr, len)) { 
3791                                                 kfree_skb(skb, FREE_READ);
3792                                                 release_sock(sk);
3793                                                 return(0);
3794                                         }
3795                         }
3796                         if (tcp_data(skb, sk, saddr, len))
3797                                                 kfree_skb(skb, FREE_READ);
3798 
3799                         if (th->fin) tcp_fin(skb, sk, th, saddr, dev);
3800                         release_sock(sk);
3801                         return(0);
3802                 }
3803 
3804                 if (th->urg) {
3805                         if (tcp_urg(sk, th, saddr, len)) {
3806                                 kfree_skb(skb, FREE_READ);
3807                                 release_sock(sk);
3808                                 return(0);
3809                         }
3810                 }
3811 
3812                 if (tcp_data(skb, sk, saddr, len)) {
3813                         kfree_skb(skb, FREE_READ);
3814                         release_sock(sk);
3815                         return(0);
3816                 }
3817 
3818                 if (!th->fin) {
3819                         release_sock(sk);
3820                         return(0);
3821                 }
3822                 tcp_fin(skb, sk, th, saddr, dev);
3823                 release_sock(sk);
3824                 return(0);
3825         }
3826 }
3827 
3828 
3829 /*
3830  * This routine sends a packet with an out of date sequence
3831  * number. It assumes the other end will try to ack it.
3832  */
3833 
3834 static void tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3835 {
3836         struct sk_buff *buff;
3837         struct tcphdr *t1;
3838         struct device *dev=NULL;
3839         int tmp;
3840 
3841         if (sk->zapped)
3842                 return; /* Afer a valid reset we can send no more */
3843 
3844         /*
3845          * Write data can still be transmitted/retransmitted in the
3846          * following states.  If any other state is encountered, return.
3847          */
3848 
3849         if (sk->state != TCP_ESTABLISHED && 
3850             sk->state != TCP_CLOSE_WAIT &&
3851             sk->state != TCP_FIN_WAIT1 && 
3852             sk->state != TCP_LAST_ACK &&
3853             sk->state != TCP_CLOSING
3854         ) {
3855                 return;
3856         }
3857 
3858         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
3859         if (buff == NULL) 
3860                 return;
3861 
3862         buff->len = sizeof(struct tcphdr);
3863         buff->free = 1;
3864         buff->sk = sk;
3865         buff->localroute = sk->localroute;
3866 
3867         DPRINTF((DBG_TCP, "in tcp_write_wakeup\n"));
3868         t1 = (struct tcphdr *) buff->data;
3869 
3870         /* Put in the IP header and routing stuff. */
3871         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3872                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
3873         if (tmp < 0) 
3874         {
3875                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3876                 return;
3877         }
3878 
3879         buff->len += tmp;
3880         t1 = (struct tcphdr *)((char *)t1 +tmp);
3881 
3882         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
3883 
3884         /*
3885          * Use a previous sequence.
3886          * This should cause the other end to send an ack.
3887          */
3888         t1->seq = htonl(sk->sent_seq-1);
3889         t1->ack = 1; 
3890         t1->res1= 0;
3891         t1->res2= 0;
3892         t1->rst = 0;
3893         t1->urg = 0;
3894         t1->psh = 0;
3895         t1->fin = 0;
3896         t1->syn = 0;
3897         t1->ack_seq = ntohl(sk->acked_seq);
3898         t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
3899         t1->doff = sizeof(*t1)/4;
3900         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
3901 
3902          /*     Send it and free it.
3903           *     This will prevent the timer from automatically being restarted.
3904           */
3905         sk->prot->queue_xmit(sk, dev, buff, 1);
3906         tcp_statistics.TcpOutSegs++;
3907 }
3908 
3909 void
3910 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3911 {
3912         if (sk->zapped)
3913                 return;         /* Afer a valid reset we can send no more */
3914 
3915         tcp_write_wakeup(sk);
3916 
3917         sk->backoff++;
3918         sk->rto = min(sk->rto << 1, 120*HZ);
3919         reset_timer (sk, TIME_PROBE0, sk->rto);
3920         sk->retransmits++;
3921         sk->prot->retransmits ++;
3922 }
3923 
3924 /*
3925  *      Socket option code for TCP. 
3926  */
3927   
3928 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
3929 {
3930         int val,err;
3931 
3932         if(level!=SOL_TCP)
3933                 return ip_setsockopt(sk,level,optname,optval,optlen);
3934 
3935         if (optval == NULL) 
3936                 return(-EINVAL);
3937 
3938         err=verify_area(VERIFY_READ, optval, sizeof(int));
3939         if(err)
3940                 return err;
3941         
3942         val = get_fs_long((unsigned long *)optval);
3943 
3944         switch(optname)
3945         {
3946                 case TCP_MAXSEG:
3947 /*                      if(val<200||val>2048 || val>sk->mtu) */
3948 /*
3949  * values greater than interface MTU won't take effect.  however at
3950  * the point when this call is done we typically don't yet know
3951  * which interface is going to be used
3952  */
3953                         if(val<1||val>MAX_WINDOW)
3954                                 return -EINVAL;
3955                         sk->user_mss=val;
3956                         return 0;
3957                 case TCP_NODELAY:
3958                         sk->nonagle=(val==0)?0:1;
3959                         return 0;
3960                 default:
3961                         return(-ENOPROTOOPT);
3962         }
3963 }
3964 
3965 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
3966 {
3967         int val,err;
3968 
3969         if(level!=SOL_TCP)
3970                 return ip_getsockopt(sk,level,optname,optval,optlen);
3971                         
3972         switch(optname)
3973         {
3974                 case TCP_MAXSEG:
3975                         val=sk->user_mss;
3976                         break;
3977                 case TCP_NODELAY:
3978                         val=sk->nonagle;        /* Until Johannes stuff is in */
3979                         break;
3980                 default:
3981                         return(-ENOPROTOOPT);
3982         }
3983         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
3984         if(err)
3985                 return err;
3986         put_fs_long(sizeof(int),(unsigned long *) optlen);
3987 
3988         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
3989         if(err)
3990                 return err;
3991         put_fs_long(val,(unsigned long *)optval);
3992 
3993         return(0);
3994 }       
3995 
3996 
3997 struct proto tcp_prot = {
3998   sock_wmalloc,
3999   sock_rmalloc,
4000   sock_wfree,
4001   sock_rfree,
4002   sock_rspace,
4003   sock_wspace,
4004   tcp_close,
4005   tcp_read,
4006   tcp_write,
4007   tcp_sendto,
4008   tcp_recvfrom,
4009   ip_build_header,
4010   tcp_connect,
4011   tcp_accept,
4012   ip_queue_xmit,
4013   tcp_retransmit,
4014   tcp_write_wakeup,
4015   tcp_read_wakeup,
4016   tcp_rcv,
4017   tcp_select,
4018   tcp_ioctl,
4019   NULL,
4020   tcp_shutdown,
4021   tcp_setsockopt,
4022   tcp_getsockopt,
4023   128,
4024   0,
4025   {NULL,},
4026   "TCP"
4027 };

/* [previous][next][first][last][top][bottom][index][help] */