root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. tcp_select_window
  3. tcp_time_wait
  4. tcp_retransmit
  5. tcp_err
  6. tcp_readable
  7. tcp_select
  8. tcp_ioctl
  9. tcp_check
  10. tcp_send_check
  11. tcp_send_skb
  12. tcp_dequeue_partial
  13. tcp_send_partial
  14. tcp_enqueue_partial
  15. tcp_send_ack
  16. tcp_build_header
  17. tcp_write
  18. tcp_sendto
  19. tcp_read_wakeup
  20. cleanup_rbuf
  21. tcp_read_urg
  22. tcp_read
  23. tcp_shutdown
  24. tcp_recvfrom
  25. tcp_reset
  26. tcp_options
  27. default_mask
  28. tcp_conn_request
  29. tcp_close
  30. tcp_write_xmit
  31. sort_send
  32. tcp_ack
  33. tcp_data
  34. tcp_check_urg
  35. tcp_urg
  36. tcp_fin
  37. tcp_accept
  38. tcp_connect
  39. tcp_sequence
  40. tcp_rcv
  41. tcp_write_wakeup
  42. tcp_send_probe0
  43. tcp_setsockopt
  44. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *
  20  * Fixes:       
  21  *              Alan Cox        :       Numerous verify_area() calls
  22  *              Alan Cox        :       Set the ACK bit on a reset
  23  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  24  *                                      and was trying to connect (tcp_err()).
  25  *              Alan Cox        :       All icmp error handling was broken
  26  *                                      pointers passed where wrong and the
  27  *                                      socket was looked up backwards. Nobody
  28  *                                      tested any icmp error code obviously.
  29  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  30  *                                      on errors. select behaves and the icmp error race
  31  *                                      has gone by moving it into sock.c
  32  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  33  *                                      packets for unknown sockets.
  34  *              Alan Cox        :       tcp option processing.
  35  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  36  *              Herp Rosmanith  :       More reset fixes
  37  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  38  *                                      any kind of RST is right out.
  39  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  40  *                                      otherwise odd bits of prattle escape still
  41  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  42  *                                      LAN workplace lockups.
  43  *              Alan Cox        :       Some tidyups using the new skb list facilities
  44  *              Alan Cox        :       sk->keepopen now seems to work
  45  *              Alan Cox        :       Pulls options out correctly on accepts
  46  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  47  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  48  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  49  *              Alan Cox        :       Added some beter commenting, as the tcp is hard to follow
  50  *              Alan Cox        :       Removed incorrect check for 20 * psh
  51  *      Michael O'Reilly        :       ack < copied bug fix.
  52  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  53  *              Alan Cox        :       FIN with no memory -> CRASH
  54  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  55  *              Alan Cox        :       Added TCP options (SOL_TCP)
  56  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  57  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  58  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  59  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  60  *              Alan Cox        :       Put in missing check for SYN bit.
  61  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  62  *                                      window non shrink trick.
  63  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  64  *              Charles Hedrick :       TCP fixes
  65  *              Toomas Tamm     :       TCP window fixes
  66  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  67  *              Charles Hedrick :       Rewrote most of it to actually work
  68  *              Linus           :       Rewrote tcp_read() and URG handling
  69  *                                      completely
  70  *              Gerhard Koerting:       Fixed some missing timer handling
  71  *              Matthew Dillon  :       Reworked TCP machine states as per RFC
  72  *              Gerhard Koerting:       PC/TCP workarounds
  73  *              Adam Caldwell   :       Assorted timer/timing errors
  74  *
  75  *
  76  * To Fix:
  77  *                      Possibly a problem with accept(). BSD accept never fails after
  78  *              it causes a select. Linux can - given the official select semantics I
  79  *              feel that _really_ its the BSD network programs that are bust (notably
  80  *              inetd, which hangs occasionally because of this).
  81  *                      Protocol closedown badly messed up.
  82  *
  83  *              This program is free software; you can redistribute it and/or
  84  *              modify it under the terms of the GNU General Public License
  85  *              as published by the Free Software Foundation; either version
  86  *              2 of the License, or(at your option) any later version.
  87  *
  88  * Description of States:
  89  *
  90  *      TCP_SYN_SENT            sent a connection request, waiting for ack
  91  *
  92  *      TCP_SYN_RECV            received a connection request, sent ack,
  93  *                              waiting for final ack in three-way handshake.
  94  *
  95  *      TCP_ESTABLISHED         connection established
  96  *
  97  *      TCP_FIN_WAIT1           our side has shutdown, waiting to complete
  98  *                              transmission of remaining buffered data
  99  *
 100  *      TCP_FIN_WAIT2           all buffered data sent, waiting for remote
 101  *                              to shutdown
 102  *
 103  *      TCP_CLOSING             both sides have shutdown but we still have
 104  *                              data we have to finish sending
 105  *
 106  *      TCP_TIME_WAIT           timeout to catch resent junk before entering
 107  *                              closed, can only be entered from FIN_WAIT2
 108  *                              or CLOSING.  Required because the other end
 109  *                              may not have gotten our last ACK causing it
 110  *                              to retransmit the data packet (which we ignore)
 111  *
 112  *      TCP_CLOSE_WAIT          remote side has shutdown and is waiting for
 113  *                              us to finish writing our data and to shutdown
 114  *                              (we have to close() to move on to LAST_ACK)
 115  *
 116  *      TCP_LAST_ACK            out side has shutdown after remote has
 117  *                              shutdown.  There may still be data in our
 118  *                              buffer that we have to finish sending
 119  *              
 120  *      TCP_CLOSED              socket is finished
 121  */
 122 #include <linux/types.h>
 123 #include <linux/sched.h>
 124 #include <linux/mm.h>
 125 #include <linux/string.h>
 126 #include <linux/socket.h>
 127 #include <linux/sockios.h>
 128 #include <linux/termios.h>
 129 #include <linux/in.h>
 130 #include <linux/fcntl.h>
 131 #include <linux/inet.h>
 132 #include <linux/netdevice.h>
 133 #include "snmp.h"
 134 #include "ip.h"
 135 #include "protocol.h"
 136 #include "icmp.h"
 137 #include "tcp.h"
 138 #include <linux/skbuff.h>
 139 #include "sock.h"
 140 #include "route.h"
 141 #include <linux/errno.h>
 142 #include <linux/timer.h>
 143 #include <asm/system.h>
 144 #include <asm/segment.h>
 145 #include <linux/mm.h>
 146 
 147 #define SEQ_TICK 3
 148 unsigned long seq_offset;
 149 struct tcp_mib  tcp_statistics;
 150 
 151 
 152 static __inline__ int 
 153 min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 154 {
 155   if (a < b) return(a);
 156   return(b);
 157 }
 158 
 159 
 160 /* This routine picks a TCP windows for a socket based on
 161    the following constraints
 162    
 163    1. The window can never be shrunk once it is offered (RFC 793)
 164    2. We limit memory per socket
 165    
 166    For now we use NET2E3's heuristic of offering half the memory
 167    we have handy. All is not as bad as this seems however because
 168    of two things. Firstly we will bin packets even within the window
 169    in order to get the data we are waiting for into the memory limit.
 170    Secondly we bin common duplicate forms at receive time
 171 
 172    Better heuristics welcome
 173 */
 174    
 175 static int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 176 {
 177         int new_window = sk->prot->rspace(sk);
 178         
 179 /*
 180  * two things are going on here.  First, we don't ever offer a
 181  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 182  * receiver side of SWS as specified in RFC1122.
 183  * Second, we always give them at least the window they
 184  * had before, in order to avoid retracting window.  This
 185  * is technically allowed, but RFC1122 advises against it and
 186  * in practice it causes trouble.
 187  */
 188         if (new_window < min(sk->mss, MAX_WINDOW/2) ||
 189             new_window < sk->window)
 190           return(sk->window);
 191         return(new_window);
 192 }
 193 
 194 /* Enter the time wait state. */
 195 
 196 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 197 {
 198   sk->state = TCP_TIME_WAIT;
 199   sk->shutdown = SHUTDOWN_MASK;
 200   if (!sk->dead)
 201         sk->state_change(sk);
 202   reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 203 }
 204 
 205 /*
 206  *      A timer event has trigger a tcp retransmit timeout. The
 207  *      socket xmit queue is ready and set up to send. Because
 208  *      the ack receive code keeps the queue straight we do
 209  *      nothing clever here.
 210  */
 211 
 212 static void
 213 tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 214 {
 215   if (all) {
 216         ip_retransmit(sk, all);
 217         return;
 218   }
 219 
 220   sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 221   /* sk->ssthresh in theory can be zero.  I guess that's OK */
 222   sk->cong_count = 0;
 223 
 224   sk->cong_window = 1;
 225 
 226   /* Do the actual retransmit. */
 227   ip_retransmit(sk, all);
 228 }
 229 
 230 
 231 /*
 232  * This routine is called by the ICMP module when it gets some
 233  * sort of error condition.  If err < 0 then the socket should
 234  * be closed and the error returned to the user.  If err > 0
 235  * it's just the icmp type << 8 | icmp code.  After adjustment
 236  * header points to the first 8 bytes of the tcp header.  We need
 237  * to find the appropriate port.
 238  */
 239 void
 240 tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 241         unsigned long saddr, struct inet_protocol *protocol)
 242 {
 243   struct tcphdr *th;
 244   struct sock *sk;
 245   struct iphdr *iph=(struct iphdr *)header;
 246   
 247   header+=4*iph->ihl;
 248    
 249 
 250   th =(struct tcphdr *)header;
 251   sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr);
 252 
 253   if (sk == NULL) return;
 254   
 255   if(err<0)
 256   {
 257         sk->err = -err;
 258         sk->error_report(sk);
 259         return;
 260   }
 261 
 262   if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) {
 263         /*
 264          * FIXME:
 265          * For now we will just trigger a linear backoff.
 266          * The slow start code should cause a real backoff here.
 267          */
 268         if (sk->cong_window > 4) sk->cong_window--;
 269         return;
 270   }
 271 
 272   sk->err = icmp_err_convert[err & 0xff].errno;
 273 
 274   /*
 275    * If we've already connected we will keep trying
 276    * until we time out, or the user gives up.
 277    */
 278   if (icmp_err_convert[err & 0xff].fatal) {
 279         if (sk->state == TCP_SYN_SENT) {
 280                 tcp_statistics.TcpAttemptFails++;
 281                 sk->state = TCP_CLOSE;
 282                 sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 283         }
 284   }
 285   return;
 286 }
 287 
 288 
 289 /*
 290  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 291  *      in the received data queue (ie a frame missing that needs sending to us)
 292  */
 293 
 294 static int
 295 tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 296 {
 297   unsigned long counted;
 298   unsigned long amount;
 299   struct sk_buff *skb;
 300   int sum;
 301   unsigned long flags;
 302 
 303   if(sk && sk->debug)
 304         printk("tcp_readable: %p - ",sk);
 305 
 306   save_flags(flags);
 307   cli();
 308   if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 309   {
 310         restore_flags(flags);
 311         if(sk && sk->debug) 
 312                 printk("empty\n");
 313         return(0);
 314   }
 315   
 316   counted = sk->copied_seq+1;   /* Where we are at the moment */
 317   amount = 0;
 318   
 319   /* Do until a push or until we are out of data. */
 320   do {
 321         if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 322                 break;
 323         sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 324         if (skb->h.th->syn)
 325                 sum++;
 326         if (sum >= 0) {                                 /* Add it up, move on */
 327                 amount += sum;
 328                 if (skb->h.th->syn) amount--;
 329                 counted += sum;
 330         }
 331         if (amount && skb->h.th->psh) break;
 332         skb = skb->next;
 333   } while(skb != (struct sk_buff *)&sk->receive_queue);
 334   if (amount && !sk->urginline && sk->urg_data &&
 335       (sk->urg_seq - sk->copied_seq) <= (counted - sk->copied_seq))
 336         amount--;               /* don't count urg data */
 337   restore_flags(flags);
 338   if(sk->debug)
 339         printk("got %lu bytes.\n",amount);
 340   return(amount);
 341 }
 342 
 343 
 344 /*
 345  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 346  *      listening socket has a receive queue of sockets to accept.
 347  */
 348 
 349 static int
 350 tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 351 {
 352   sk->inuse = 1;
 353   switch(sel_type) {
 354         case SEL_IN:
 355                 if(sk->debug)
 356                         printk("select in");
 357                 select_wait(sk->sleep, wait);
 358                 if(sk->debug)
 359                         printk("-select out");
 360                 if (skb_peek(&sk->receive_queue) != NULL) {
 361                         if (sk->state == TCP_LISTEN || tcp_readable(sk)) {
 362                                 release_sock(sk);
 363                                 if(sk->debug)
 364                                         printk("-select ok data\n");
 365                                 return(1);
 366                         }
 367                 }
 368                 if (sk->err != 0)       /* Receiver error */
 369                 {
 370                         release_sock(sk);
 371                         if(sk->debug)
 372                                 printk("-select ok error");
 373                         return(1);
 374                 }
 375                 if (sk->shutdown & RCV_SHUTDOWN) {
 376                         release_sock(sk);
 377                         if(sk->debug)
 378                                 printk("-select ok down\n");
 379                         return(1);
 380                 } else {
 381                         release_sock(sk);
 382                         if(sk->debug)
 383                                 printk("-select fail\n");
 384                         return(0);
 385                 }
 386         case SEL_OUT:
 387                 select_wait(sk->sleep, wait);
 388                 if (sk->shutdown & SEND_SHUTDOWN) {
 389                         /* FIXME: should this return an error? */
 390                         release_sock(sk);
 391                         return(0);
 392                 }
 393 
 394                 /*
 395                  * FIXME:
 396                  * Hack so it will probably be able to write
 397                  * something if it says it's ok to write.
 398                  */
 399                 if (sk->prot->wspace(sk) >= sk->mss) {
 400                         release_sock(sk);
 401                         /* This should cause connect to work ok. */
 402                         if (sk->state == TCP_SYN_RECV ||
 403                             sk->state == TCP_SYN_SENT) return(0);
 404                         return(1);
 405                 }
 406                 release_sock(sk);
 407                 return(0);
 408         case SEL_EX:
 409                 select_wait(sk->sleep,wait);
 410                 if (sk->err || sk->urg_data) {
 411                         release_sock(sk);
 412                         return(1);
 413                 }
 414                 release_sock(sk);
 415                 return(0);
 416   }
 417 
 418   release_sock(sk);
 419   return(0);
 420 }
 421 
 422 
 423 int
 424 tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 425 {
 426   int err;
 427   switch(cmd) {
 428 
 429         case TIOCINQ:
 430 #ifdef FIXME    /* FIXME: */
 431         case FIONREAD:
 432 #endif
 433                 {
 434                         unsigned long amount;
 435 
 436                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 437 
 438                         sk->inuse = 1;
 439                         amount = tcp_readable(sk);
 440                         release_sock(sk);
 441                         err=verify_area(VERIFY_WRITE,(void *)arg,
 442                                                    sizeof(unsigned long));
 443                         if(err)
 444                                 return err;
 445                         put_fs_long(amount,(unsigned long *)arg);
 446                         return(0);
 447                 }
 448         case SIOCATMARK:
 449                 {
 450                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
 451 
 452                         err = verify_area(VERIFY_WRITE,(void *) arg,
 453                                                   sizeof(unsigned long));
 454                         if (err)
 455                                 return err;
 456                         put_fs_long(answ,(int *) arg);
 457                         return(0);
 458                 }
 459         case TIOCOUTQ:
 460                 {
 461                         unsigned long amount;
 462 
 463                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 464                         amount = sk->prot->wspace(sk);
 465                         err=verify_area(VERIFY_WRITE,(void *)arg,
 466                                                    sizeof(unsigned long));
 467                         if(err)
 468                                 return err;
 469                         put_fs_long(amount,(unsigned long *)arg);
 470                         return(0);
 471                 }
 472         default:
 473                 return(-EINVAL);
 474   }
 475 }
 476 
 477 
 478 /* This routine computes a TCP checksum. */
 479 unsigned short
 480 tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 481           unsigned long saddr, unsigned long daddr)
 482 {     
 483   unsigned long sum;
 484    
 485   if (saddr == 0) saddr = ip_my_addr();
 486   __asm__("\t addl %%ecx,%%ebx\n"
 487           "\t adcl %%edx,%%ebx\n"
 488           "\t adcl $0, %%ebx\n"
 489           : "=b"(sum)
 490           : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 491           : "cx","bx","dx" );
 492    
 493   if (len > 3) {
 494         __asm__("\tclc\n"
 495                 "1:\n"
 496                 "\t lodsl\n"
 497                 "\t adcl %%eax, %%ebx\n"
 498                 "\t loop 1b\n"
 499                 "\t adcl $0, %%ebx\n"
 500                 : "=b"(sum) , "=S"(th)
 501                 : "0"(sum), "c"(len/4) ,"1"(th)
 502                 : "ax", "cx", "bx", "si" );
 503   }
 504    
 505   /* Convert from 32 bits to 16 bits. */
 506   __asm__("\t movl %%ebx, %%ecx\n"
 507           "\t shrl $16,%%ecx\n"
 508           "\t addw %%cx, %%bx\n"
 509           "\t adcw $0, %%bx\n"
 510           : "=b"(sum)
 511           : "0"(sum)
 512           : "bx", "cx");
 513    
 514   /* Check for an extra word. */
 515   if ((len & 2) != 0) {
 516         __asm__("\t lodsw\n"
 517                 "\t addw %%ax,%%bx\n"
 518                 "\t adcw $0, %%bx\n"
 519                 : "=b"(sum), "=S"(th)
 520                 : "0"(sum) ,"1"(th)
 521                 : "si", "ax", "bx");
 522   }
 523    
 524   /* Now check for the extra byte. */
 525   if ((len & 1) != 0) {
 526         __asm__("\t lodsb\n"
 527                 "\t movb $0,%%ah\n"
 528                 "\t addw %%ax,%%bx\n"
 529                 "\t adcw $0, %%bx\n"
 530                 : "=b"(sum)
 531                 : "0"(sum) ,"S"(th)
 532                 : "si", "ax", "bx");
 533   }
 534    
 535   /* We only want the bottom 16 bits, but we never cleared the top 16. */
 536   return((~sum) & 0xffff);
 537 }
 538 
 539 
 540 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 541                 unsigned long daddr, int len, struct sock *sk)
 542 {
 543         th->check = 0;
 544         th->check = tcp_check(th, len, saddr, daddr);
 545         return;
 546 }
 547 
 548 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 549 {
 550         int size;
 551         struct tcphdr * th = skb->h.th;
 552 
 553         /* length of packet (not counting length of pre-tcp headers) */
 554         size = skb->len - ((unsigned char *) th - skb->data);
 555 
 556         /* sanity check it.. */
 557         if (size < sizeof(struct tcphdr) || size > skb->len) {
 558                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 559                         skb, skb->data, th, skb->len);
 560                 kfree_skb(skb, FREE_WRITE);
 561                 return;
 562         }
 563 
 564         /* If we have queued a header size packet.. */
 565         if (size == sizeof(struct tcphdr)) {
 566                 /* If its got a syn or fin its notionally included in the size..*/
 567                 if(!th->syn && !th->fin) {
 568                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 569                         kfree_skb(skb,FREE_WRITE);
 570                         return;
 571                 }
 572         }
 573 
 574         tcp_statistics.TcpOutSegs++;  
 575         /* We need to complete and send the packet. */
 576         tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
 577 
 578         skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
 579         if (after(skb->h.seq, sk->window_seq) ||
 580             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 581              sk->packets_out >= sk->cong_window) {
 582                 if (skb->next != NULL) {
 583                         printk("tcp_send_partial: next != NULL\n");
 584                         skb_unlink(skb);
 585                 }
 586                 skb_queue_tail(&sk->write_queue, skb);
 587                 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 588                     sk->send_head == NULL &&
 589                     sk->ack_backlog == 0)
 590                   reset_timer(sk, TIME_PROBE0, sk->rto);
 591         } else {
 592                 sk->sent_seq = sk->write_seq;
 593                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 594         }
 595 }
 596 
 597 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 598 {
 599         struct sk_buff * skb;
 600         unsigned long flags;
 601 
 602         save_flags(flags);
 603         cli();
 604         skb = sk->partial;
 605         if (skb) {
 606                 sk->partial = NULL;
 607                 del_timer(&sk->partial_timer);
 608         }
 609         restore_flags(flags);
 610         return skb;
 611 }
 612 
 613 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 614 {
 615         struct sk_buff *skb;
 616 
 617         if (sk == NULL)
 618                 return;
 619         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 620                 tcp_send_skb(sk, skb);
 621 }
 622 
 623 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 624 {
 625         struct sk_buff * tmp;
 626         unsigned long flags;
 627 
 628         save_flags(flags);
 629         cli();
 630         tmp = sk->partial;
 631         if (tmp)
 632                 del_timer(&sk->partial_timer);
 633         sk->partial = skb;
 634         sk->partial_timer.expires = HZ;
 635         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 636         sk->partial_timer.data = (unsigned long) sk;
 637         add_timer(&sk->partial_timer);
 638         restore_flags(flags);
 639         if (tmp)
 640                 tcp_send_skb(sk, tmp);
 641 }
 642 
 643 
 644 /* This routine sends an ack and also updates the window. */
 645 static void
 646 tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 647              struct sock *sk,
 648              struct tcphdr *th, unsigned long daddr)
 649 {
 650         struct sk_buff *buff;
 651         struct tcphdr *t1;
 652         struct device *dev = NULL;
 653         int tmp;
 654 
 655         if(sk->zapped)
 656                 return;         /* We have been reset, we may not send again */
 657         /*
 658          * We need to grab some memory, and put together an ack,
 659          * and then put it into the queue to be sent.
 660          */
 661 
 662         buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 663         if (buff == NULL) 
 664         {
 665                 /* Force it to send an ack. */
 666                 sk->ack_backlog++;
 667                 if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) 
 668                 {
 669                         reset_timer(sk, TIME_WRITE, 10);
 670                 }
 671                 return;
 672         }
 673 
 674         buff->len = sizeof(struct tcphdr);
 675         buff->sk = sk;
 676         buff->localroute = sk->localroute;
 677         t1 =(struct tcphdr *) buff->data;
 678 
 679         /* Put in the IP header and routing stuff. */
 680         tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 681                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 682         if (tmp < 0) 
 683         {
 684                 buff->free=1;
 685                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 686                 return;
 687         }
 688         buff->len += tmp;
 689         t1 =(struct tcphdr *)((char *)t1 +tmp);
 690 
 691         /* FIXME: */
 692         memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 693 
 694         /*
 695          *      Swap the send and the receive. 
 696          */
 697         t1->dest = th->source;
 698         t1->source = th->dest;
 699         t1->seq = ntohl(sequence);
 700         t1->ack = 1;
 701         sk->window = tcp_select_window(sk);
 702         t1->window = ntohs(sk->window);
 703         t1->res1 = 0;
 704         t1->res2 = 0;
 705         t1->rst = 0;
 706         t1->urg = 0;
 707         t1->syn = 0;
 708         t1->psh = 0;
 709         t1->fin = 0;
 710         if (ack == sk->acked_seq) 
 711         {
 712                 sk->ack_backlog = 0;
 713                 sk->bytes_rcv = 0;
 714                 sk->ack_timed = 0;
 715                 if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
 716                                   && sk->timeout == TIME_WRITE) 
 717                 {
 718                         if(sk->keepopen)
 719                                 reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 720                         else
 721                                 delete_timer(sk);
 722                 }
 723         }
 724         t1->ack_seq = ntohl(ack);
 725         t1->doff = sizeof(*t1)/4;
 726         tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 727         if (sk->debug)
 728                  printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 729         tcp_statistics.TcpOutSegs++;
 730         sk->prot->queue_xmit(sk, dev, buff, 1);
 731 }
 732 
 733 
 734 /* This routine builds a generic TCP header. */
 735 static int
 736 tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 737 {
 738 
 739   /* FIXME: want to get rid of this. */
 740   memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 741   th->seq = htonl(sk->write_seq);
 742   th->psh =(push == 0) ? 1 : 0;
 743   th->doff = sizeof(*th)/4;
 744   th->ack = 1;
 745   th->fin = 0;
 746   sk->ack_backlog = 0;
 747   sk->bytes_rcv = 0;
 748   sk->ack_timed = 0;
 749   th->ack_seq = htonl(sk->acked_seq);
 750   sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/;
 751   th->window = htons(sk->window);
 752 
 753   return(sizeof(*th));
 754 }
 755 
 756 /*
 757  * This routine copies from a user buffer into a socket,
 758  * and starts the transmit system.
 759  */
 760 static int tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 761           int len, int nonblock, unsigned flags)
 762 {
 763         int copied = 0;
 764         int copy;
 765         int tmp;
 766         struct sk_buff *skb;
 767         struct sk_buff *send_tmp;
 768         unsigned char *buff;
 769         struct proto *prot;
 770         struct device *dev = NULL;
 771 
 772         sk->inuse=1;
 773         prot = sk->prot;
 774         while(len > 0) 
 775         {
 776                 if (sk->err) 
 777                 {                       /* Stop on an error */
 778                         release_sock(sk);
 779                         if (copied) 
 780                                 return(copied);
 781                         tmp = -sk->err;
 782                         sk->err = 0;
 783                         return(tmp);
 784                 }
 785 
 786         /*
 787          *      First thing we do is make sure that we are established. 
 788          */
 789         
 790                 if (sk->shutdown & SEND_SHUTDOWN) 
 791                 {
 792                         release_sock(sk);
 793                         sk->err = EPIPE;
 794                         if (copied) 
 795                                 return(copied);
 796                         sk->err = 0;
 797                         return(-EPIPE);
 798                 }
 799 
 800 
 801         /* 
 802          *      Wait for a connection to finish.
 803          */
 804         
 805                 while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
 806                 {
 807                         if (sk->err) 
 808                         {
 809                                 release_sock(sk);
 810                                 if (copied) 
 811                                         return(copied);
 812                                 tmp = -sk->err;
 813                                 sk->err = 0;
 814                                 return(tmp);
 815                         }
 816 
 817                         if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
 818                         {
 819                                 release_sock(sk);
 820                                 if (copied) 
 821                                         return(copied);
 822 
 823                                 if (sk->err) 
 824                                 {
 825                                         tmp = -sk->err;
 826                                         sk->err = 0;
 827                                         return(tmp);
 828                                 }
 829 
 830                                 if (sk->keepopen) 
 831                                 {
 832                                         send_sig(SIGPIPE, current, 0);
 833                                 }
 834                                 return(-EPIPE);
 835                         }
 836 
 837                         if (nonblock || copied) 
 838                         {
 839                                 release_sock(sk);
 840                                 if (copied) 
 841                                         return(copied);
 842                                 return(-EAGAIN);
 843                         }
 844 
 845                         release_sock(sk);
 846                         cli();
 847                 
 848                         if (sk->state != TCP_ESTABLISHED &&
 849                                 sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
 850                         {
 851                                 interruptible_sleep_on(sk->sleep);
 852                                 if (current->signal & ~current->blocked) 
 853                                 {
 854                                         sti();
 855                                         if (copied) 
 856                                                 return(copied);
 857                                         return(-ERESTARTSYS);
 858                                 }
 859                         }
 860                         sk->inuse = 1;
 861                         sti();
 862                 }
 863 
 864         /*
 865          * The following code can result in copy <= if sk->mss is ever
 866          * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
 867          * sk->mtu is constant once SYN processing is finished.  I.e. we
 868          * had better not get here until we've seen his SYN and at least one
 869          * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
 870          * But ESTABLISHED should guarantee that.  sk->max_window is by definition
 871          * non-decreasing.  Note that any ioctl to set user_mss must be done
 872          * before the exchange of SYN's.  If the initial ack from the other
 873          * end has a window of 0, max_window and thus mss will both be 0.
 874          */
 875 
 876         /* 
 877          *      Now we need to check if we have a half built packet. 
 878          */
 879 
 880                 if ((skb = tcp_dequeue_partial(sk)) != NULL) 
 881                 {
 882                         int hdrlen;
 883 
 884                          /* IP header + TCP header */
 885                         hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
 886                                  + sizeof(struct tcphdr);
 887         
 888                         /* Add more stuff to the end of skb->len */
 889                         if (!(flags & MSG_OOB)) 
 890                         {
 891                                 copy = min(sk->mss - (skb->len - hdrlen), len);
 892                                 /* FIXME: this is really a bug. */
 893                                 if (copy <= 0) 
 894                                 {
 895                                         printk("TCP: **bug**: \"copy\" <= 0!!\n");
 896                                         copy = 0;
 897                                 }
 898           
 899                                 memcpy_fromfs(skb->data + skb->len, from, copy);
 900                                 skb->len += copy;
 901                                 from += copy;
 902                                 copied += copy;
 903                                 len -= copy;
 904                                 sk->write_seq += copy;
 905                         }
 906                         if ((skb->len - hdrlen) >= sk->mss ||
 907                                 (flags & MSG_OOB) || !sk->packets_out)
 908                                 tcp_send_skb(sk, skb);
 909                         else
 910                                 tcp_enqueue_partial(skb, sk);
 911                         continue;
 912                 }
 913 
 914         /*
 915          * We also need to worry about the window.
 916          * If window < 1/2 the maximum window we've seen from this
 917          *   host, don't use it.  This is sender side
 918          *   silly window prevention, as specified in RFC1122.
 919          *   (Note that this is diffferent than earlier versions of
 920          *   SWS prevention, e.g. RFC813.).  What we actually do is 
 921          *   use the whole MSS.  Since the results in the right
 922          *   edge of the packet being outside the window, it will
 923          *   be queued for later rather than sent.
 924          */
 925 
 926                 copy = sk->window_seq - sk->write_seq;
 927                 if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
 928                         copy = sk->mss;
 929                 if (copy > len)
 930                         copy = len;
 931 
 932         /*
 933          *      We should really check the window here also. 
 934          */
 935          
 936                 send_tmp = NULL;
 937                 if (copy < sk->mss && !(flags & MSG_OOB)) 
 938                 {
 939                         /*
 940                          *      We will release the socket incase we sleep here. 
 941                          */
 942                         release_sock(sk);
 943                         /*
 944                          *      NB: following must be mtu, because mss can be increased.
 945                          *      mss is always <= mtu 
 946                          */
 947                         skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
 948                         sk->inuse = 1;
 949                         send_tmp = skb;
 950                 } 
 951                 else 
 952                 {
 953                         /*
 954                          *      We will release the socket incase we sleep here. 
 955                          */
 956                         release_sock(sk);
 957                         skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
 958                         sk->inuse = 1;
 959                 }
 960 
 961                 /*
 962                  *      If we didn't get any memory, we need to sleep. 
 963                  */
 964 
 965                 if (skb == NULL) 
 966                 {
 967                         if (nonblock /* || copied */) 
 968                         {
 969                                 release_sock(sk);
 970                                 if (copied) 
 971                                         return(copied);
 972                                 return(-EAGAIN);
 973                         }
 974 
 975                         /*
 976                          *      FIXME: here is another race condition. 
 977                          */
 978 
 979                         tmp = sk->wmem_alloc;
 980                         release_sock(sk);
 981                         cli();
 982                         /*
 983                          *      Again we will try to avoid it. 
 984                          */
 985                         if (tmp <= sk->wmem_alloc &&
 986                                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
 987                                 && sk->err == 0) 
 988                         {
 989                                 interruptible_sleep_on(sk->sleep);
 990                                 if (current->signal & ~current->blocked) 
 991                                 {
 992                                         sti();
 993                                         if (copied) 
 994                                                 return(copied);
 995                                         return(-ERESTARTSYS);
 996                                 }
 997                         }
 998                         sk->inuse = 1;
 999                         sti();
1000                         continue;
1001                 }
1002 
1003                 skb->len = 0;
1004                 skb->sk = sk;
1005                 skb->free = 0;
1006                 skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
1007         
1008                 buff = skb->data;
1009         
1010                 /*
1011                  * FIXME: we need to optimize this.
1012                  * Perhaps some hints here would be good.
1013                  */
1014                 
1015                 tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1016                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1017                 if (tmp < 0 ) 
1018                 {
1019                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1020                         release_sock(sk);
1021                         if (copied) 
1022                                 return(copied);
1023                         return(tmp);
1024                 }
1025                 skb->len += tmp;
1026                 skb->dev = dev;
1027                 buff += tmp;
1028                 skb->h.th =(struct tcphdr *) buff;
1029                 tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1030                 if (tmp < 0) 
1031                 {
1032                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1033                         release_sock(sk);
1034                         if (copied) 
1035                                 return(copied);
1036                         return(tmp);
1037                 }
1038 
1039                 if (flags & MSG_OOB) 
1040                 {
1041                         ((struct tcphdr *)buff)->urg = 1;
1042                         ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1043                 }
1044                 skb->len += tmp;
1045                 memcpy_fromfs(buff+tmp, from, copy);
1046 
1047                 from += copy;
1048                 copied += copy;
1049                 len -= copy;
1050                 skb->len += copy;
1051                 skb->free = 0;
1052                 sk->write_seq += copy;
1053         
1054                 if (send_tmp != NULL && sk->packets_out) 
1055                 {
1056                         tcp_enqueue_partial(send_tmp, sk);
1057                         continue;
1058                 }
1059                 tcp_send_skb(sk, skb);
1060         }
1061         sk->err = 0;
1062 
1063 /*
1064  *      Nagles rule. Turn Nagle off with TCP_NODELAY for highly
1065  *      interactive fast network servers. It's meant to be on and
1066  *      it really improves the throughput though not the echo time
1067  *      on my slow slip link - Alan
1068  */
1069 
1070 /*
1071  *      Avoid possible race on send_tmp - c/o Johannes Stille 
1072  */
1073  
1074         if(sk->partial && ((!sk->packets_out) 
1075      /* If not nagling we can send on the before case too.. */
1076               || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1077         ))
1078                 tcp_send_partial(sk);
1079 
1080         release_sock(sk);
1081         return(copied);
1082 }
1083 
1084 
1085 static int tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1086            int len, int nonblock, unsigned flags,
1087            struct sockaddr_in *addr, int addr_len)
1088 {
1089         struct sockaddr_in sin;
1090 
1091         if (flags & ~(MSG_OOB|MSG_DONTROUTE))
1092                 return -EINVAL;
1093         if (addr_len < sizeof(sin)) 
1094                 return(-EINVAL);
1095         memcpy_fromfs(&sin, addr, sizeof(sin));
1096         if (sin.sin_family && sin.sin_family != AF_INET) 
1097                 return(-EINVAL);
1098         if (sin.sin_port != sk->dummy_th.dest) 
1099                 return(-EINVAL);
1100         if (sin.sin_addr.s_addr != sk->daddr) 
1101                 return(-EINVAL);
1102         return(tcp_write(sk, from, len, nonblock, flags));
1103 }
1104 
1105 
1106 static void
1107 tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1108 {
1109         int tmp;
1110         struct device *dev = NULL;
1111         struct tcphdr *t1;
1112         struct sk_buff *buff;
1113 
1114         if (!sk->ack_backlog) 
1115                 return;
1116 
1117         /*
1118          * FIXME: we need to put code here to prevent this routine from
1119          * being called.  Being called once in a while is ok, so only check
1120          * if this is the second time in a row.
1121          */
1122 
1123         /*
1124          * We need to grab some memory, and put together an ack,
1125          * and then put it into the queue to be sent.
1126          */
1127 
1128         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1129         if (buff == NULL) 
1130         {
1131                 /* Try again real soon. */
1132                 reset_timer(sk, TIME_WRITE, 10);
1133                 return;
1134         }
1135 
1136         buff->len = sizeof(struct tcphdr);
1137         buff->sk = sk;
1138         buff->localroute = sk->localroute;
1139         
1140         /*
1141          *      Put in the IP header and routing stuff. 
1142          */
1143 
1144         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1145                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1146         if (tmp < 0) 
1147         {
1148                 buff->free=1;
1149                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1150                 return;
1151         }
1152 
1153         buff->len += tmp;
1154         t1 =(struct tcphdr *)(buff->data +tmp);
1155 
1156         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1157         t1->seq = htonl(sk->sent_seq);
1158         t1->ack = 1;
1159         t1->res1 = 0;
1160         t1->res2 = 0;
1161         t1->rst = 0;
1162         t1->urg = 0;
1163         t1->syn = 0;
1164         t1->psh = 0;
1165         sk->ack_backlog = 0;
1166         sk->bytes_rcv = 0;
1167         sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
1168         t1->window = ntohs(sk->window);
1169         t1->ack_seq = ntohl(sk->acked_seq);
1170         t1->doff = sizeof(*t1)/4;
1171         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1172         sk->prot->queue_xmit(sk, dev, buff, 1);
1173         tcp_statistics.TcpOutSegs++;
1174 }
1175 
1176 
1177 /*
1178  *      FIXME:
1179  *      This routine frees used buffers.
1180  *      It should consider sending an ACK to let the
1181  *      other end know we now have a bigger window.
1182  */
1183 
1184 static void cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1185 {
1186         unsigned long flags;
1187         int left;
1188         struct sk_buff *skb;
1189 
1190         if(sk->debug)
1191                 printk("cleaning rbuf for sk=%p\n", sk);
1192   
1193         save_flags(flags);
1194         cli();
1195   
1196         left = sk->prot->rspace(sk);
1197  
1198         /*
1199          * We have to loop through all the buffer headers,
1200          * and try to free up all the space we can.
1201          */
1202 
1203         while((skb=skb_peek(&sk->receive_queue)) != NULL) 
1204         {
1205                 if (!skb->used) 
1206                         break;
1207                 skb_unlink(skb);
1208                 skb->sk = sk;
1209                 kfree_skb(skb, FREE_READ);
1210         }
1211 
1212         restore_flags(flags);
1213 
1214         /*
1215          * FIXME:
1216          * At this point we should send an ack if the difference
1217          * in the window, and the amount of space is bigger than
1218          * TCP_WINDOW_DIFF.
1219          */
1220 
1221         if(sk->debug)
1222                 printk("sk->rspace = %lu, was %d\n", sk->prot->rspace(sk),
1223                                             left);
1224         if (sk->prot->rspace(sk) != left) 
1225         {
1226                 /*
1227                  * This area has caused the most trouble.  The current strategy
1228                  * is to simply do nothing if the other end has room to send at
1229                  * least 3 full packets, because the ack from those will auto-
1230                  * matically update the window.  If the other end doesn't think
1231                  * we have much space left, but we have room for atleast 1 more
1232                  * complete packet than it thinks we do, we will send an ack
1233                  * immediatedly.  Otherwise we will wait up to .5 seconds in case
1234                  * the user reads some more.
1235                  */
1236                 sk->ack_backlog++;
1237         /*
1238          * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1239          * if the other end is offering a window smaller than the agreed on MSS
1240          * (called sk->mtu here).  In theory there's no connection between send
1241          * and receive, and so no reason to think that they're going to send
1242          * small packets.  For the moment I'm using the hack of reducing the mss
1243          * only on the send side, so I'm putting mtu here.
1244          */
1245 
1246                 if ((sk->prot->rspace(sk) > (sk->window - sk->bytes_rcv + sk->mtu))) 
1247                 {
1248                         /* Send an ack right now. */
1249                         tcp_read_wakeup(sk);
1250                 } 
1251                 else 
1252                 {
1253                         /* Force it to send an ack soon. */
1254                         int was_active = del_timer(&sk->timer);
1255                         if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
1256                         {
1257                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1258                         } 
1259                         else
1260                                 add_timer(&sk->timer);
1261                 }
1262         }
1263 } 
1264 
1265 
1266 /* Handle reading urgent data. */
1267 static int
1268 tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1269              unsigned char *to, int len, unsigned flags)
1270 {
1271         struct wait_queue wait = { current, NULL };
1272 
1273         while (len > 0) {
1274                 if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1275                         return -EINVAL;
1276                 if (sk->urg_data & URG_VALID) {
1277                         char c = sk->urg_data;
1278                         if (!(flags & MSG_PEEK))
1279                                 sk->urg_data = URG_READ;
1280                         put_fs_byte(c, to);
1281                         return 1;
1282                 }
1283 
1284                 if (sk->err) {
1285                         int tmp = -sk->err;
1286                         sk->err = 0;
1287                         return tmp;
1288                 }
1289 
1290                 if (sk->state == TCP_CLOSE || sk->done) {
1291                         if (!sk->done) {
1292                                 sk->done = 1;
1293                                 return 0;
1294                         }
1295                         return -ENOTCONN;
1296                 }
1297 
1298                 if (sk->shutdown & RCV_SHUTDOWN) {
1299                         sk->done = 1;
1300                         return 0;
1301                 }
1302 
1303                 if (nonblock)
1304                         return -EAGAIN;
1305 
1306                 if (current->signal & ~current->blocked)
1307                         return -ERESTARTSYS;
1308 
1309                 current->state = TASK_INTERRUPTIBLE;
1310                 add_wait_queue(sk->sleep, &wait);
1311                 if ((sk->urg_data & URG_NOTYET) && sk->err == 0 &&
1312                     !(sk->shutdown & RCV_SHUTDOWN))
1313                         schedule();
1314                 remove_wait_queue(sk->sleep, &wait);
1315                 current->state = TASK_RUNNING;
1316         }
1317         return 0;
1318 }
1319 
1320 
1321 /* This routine copies from a sock struct into the user buffer. */
1322 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1323         int len, int nonblock, unsigned flags)
1324 {
1325         struct wait_queue wait = { current, NULL };
1326         int copied = 0;
1327         unsigned long peek_seq;
1328         unsigned long *seq;
1329         unsigned long used;
1330 
1331         /* This error should be checked. */
1332         if (sk->state == TCP_LISTEN)
1333                 return -ENOTCONN;
1334 
1335         /* Urgent data needs to be handled specially. */
1336         if (flags & MSG_OOB)
1337                 return tcp_read_urg(sk, nonblock, to, len, flags);
1338 
1339         peek_seq = sk->copied_seq;
1340         seq = &sk->copied_seq;
1341         if (flags & MSG_PEEK)
1342                 seq = &peek_seq;
1343 
1344         add_wait_queue(sk->sleep, &wait);
1345         sk->inuse = 1;
1346         while (len > 0) {
1347                 struct sk_buff * skb;
1348                 unsigned long offset;
1349         
1350                 /*
1351                  * are we at urgent data? Stop if we have read anything.
1352                  */
1353                 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1354                         break;
1355 
1356                 current->state = TASK_INTERRUPTIBLE;
1357 
1358                 skb = skb_peek(&sk->receive_queue);
1359                 do {
1360                         if (!skb)
1361                                 break;
1362                         if (before(1+*seq, skb->h.th->seq))
1363                                 break;
1364                         offset = 1 + *seq - skb->h.th->seq;
1365                         if (skb->h.th->syn)
1366                                 offset--;
1367                         if (offset < skb->len)
1368                                 goto found_ok_skb;
1369                         if (!(flags & MSG_PEEK))
1370                                 skb->used = 1;
1371                         skb = skb->next;
1372                 } while (skb != (struct sk_buff *)&sk->receive_queue);
1373 
1374                 if (copied)
1375                         break;
1376 
1377                 if (sk->err) {
1378                         copied = -sk->err;
1379                         sk->err = 0;
1380                         break;
1381                 }
1382 
1383                 if (sk->state == TCP_CLOSE) {
1384                         if (!sk->done) {
1385                                 sk->done = 1;
1386                                 break;
1387                         }
1388                         copied = -ENOTCONN;
1389                         break;
1390                 }
1391 
1392                 if (sk->shutdown & RCV_SHUTDOWN) {
1393                         sk->done = 1;
1394                         break;
1395                 }
1396                         
1397                 if (nonblock) {
1398                         copied = -EAGAIN;
1399                         break;
1400                 }
1401 
1402                 cleanup_rbuf(sk);
1403                 release_sock(sk);
1404                 schedule();
1405                 sk->inuse = 1;
1406 
1407                 if (current->signal & ~current->blocked) {
1408                         copied = -ERESTARTSYS;
1409                         break;
1410                 }
1411                 continue;
1412 
1413         found_ok_skb:
1414                 /* Ok so how much can we use ? */
1415                 used = skb->len - offset;
1416                 if (len < used)
1417                         used = len;
1418                 /* do we have urgent data here? */
1419                 if (sk->urg_data) {
1420                         unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1421                         if (urg_offset < used) {
1422                                 if (!urg_offset) {
1423                                         if (!sk->urginline) {
1424                                                 ++*seq;
1425                                                 offset++;
1426                                                 used--;
1427                                         }
1428                                 } else
1429                                         used = urg_offset;
1430                         }
1431                 }
1432                 /* Copy it */
1433                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1434                         skb->h.th->doff*4 + offset, used);
1435                 copied += used;
1436                 len -= used;
1437                 to += used;
1438                 *seq += used;
1439                 if (after(sk->copied_seq+1,sk->urg_seq))
1440                         sk->urg_data = 0;
1441                 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1442                         skb->used = 1;
1443         }
1444         remove_wait_queue(sk->sleep, &wait);
1445         current->state = TASK_RUNNING;
1446 
1447         /* Clean up data we have read: This will do ACK frames */
1448         cleanup_rbuf(sk);
1449         release_sock(sk);
1450         return copied;
1451 }
1452 
1453  
1454 /*
1455  * Shutdown the sending side of a connection.
1456  */
1457 
1458 void tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1459 {
1460         struct sk_buff *buff;
1461         struct tcphdr *t1, *th;
1462         struct proto *prot;
1463         int tmp;
1464         struct device *dev = NULL;
1465 
1466         /*
1467          * We need to grab some memory, and put together a FIN,
1468          * and then put it into the queue to be sent.
1469          * FIXME:
1470          *
1471          *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1472          *      Most of this is guesswork, so maybe it will work...
1473          */
1474 
1475         if (!(how & SEND_SHUTDOWN)) 
1476                 return;
1477          
1478         /*
1479          *      If we've already sent a FIN, return. 
1480          */
1481          
1482         if (sk->state == TCP_FIN_WAIT1 ||
1483             sk->state == TCP_FIN_WAIT2 ||
1484             sk->state == TCP_CLOSING ||
1485             sk->state == TCP_LAST_ACK ||
1486             sk->state == TCP_TIME_WAIT
1487         ) {
1488                 return;
1489         }
1490         sk->inuse = 1;
1491 
1492         /*
1493          * flag that the sender has shutdown
1494          */
1495 
1496         sk->shutdown |= SEND_SHUTDOWN;
1497 
1498         /*
1499          *  Clear out any half completed packets. 
1500          */
1501 
1502         if (sk->partial)
1503                 tcp_send_partial(sk);
1504 
1505         prot =(struct proto *)sk->prot;
1506         th =(struct tcphdr *)&sk->dummy_th;
1507         release_sock(sk); /* incase the malloc sleeps. */
1508         buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1509         if (buff == NULL)
1510                 return;
1511         sk->inuse = 1;
1512 
1513         buff->sk = sk;
1514         buff->len = sizeof(*t1);
1515         buff->localroute = sk->localroute;
1516         t1 =(struct tcphdr *) buff->data;
1517 
1518         /*
1519          *      Put in the IP header and routing stuff. 
1520          */
1521 
1522         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1523                            IPPROTO_TCP, sk->opt,
1524                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1525         if (tmp < 0) 
1526         {
1527                 /*
1528                  *      Finish anyway, treat this as a send that got lost. 
1529                  *
1530                  *      Enter FIN_WAIT1 on normal shutdown, which waits for
1531                  *      written data to be completely acknowledged along
1532                  *      with an acknowledge to our FIN.
1533                  *
1534                  *      Enter FIN_WAIT2 on abnormal shutdown -- close before
1535                  *      connection established.
1536                  */
1537                 buff->free=1;
1538                 prot->wfree(sk,buff->mem_addr, buff->mem_len);
1539 
1540                 if (sk->state == TCP_ESTABLISHED)
1541                         sk->state = TCP_FIN_WAIT1;
1542                 else if(sk->state == TCP_CLOSE_WAIT)
1543                         sk->state = TCP_LAST_ACK;
1544                 else
1545                         sk->state = TCP_FIN_WAIT2;
1546 
1547                 release_sock(sk);
1548                 return;
1549         }
1550 
1551         t1 =(struct tcphdr *)((char *)t1 +tmp);
1552         buff->len += tmp;
1553         buff->dev = dev;
1554         memcpy(t1, th, sizeof(*t1));
1555         t1->seq = ntohl(sk->write_seq);
1556         sk->write_seq++;
1557         buff->h.seq = sk->write_seq;
1558         t1->ack = 1;
1559         t1->ack_seq = ntohl(sk->acked_seq);
1560         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
1561         t1->fin = 1;
1562         t1->rst = 0;
1563         t1->doff = sizeof(*t1)/4;
1564         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1565 
1566         /*
1567          * Can't just queue this up.
1568          * It should go at the end of the write queue.
1569          */
1570         
1571         if (skb_peek(&sk->write_queue) != NULL) 
1572         {
1573                 buff->free=0;
1574                 if (buff->next != NULL) 
1575                 {
1576                         printk("tcp_shutdown: next != NULL\n");
1577                         skb_unlink(buff);
1578                 }
1579                 skb_queue_tail(&sk->write_queue, buff);
1580         } 
1581         else 
1582         {
1583                 sk->sent_seq = sk->write_seq;
1584                 sk->prot->queue_xmit(sk, dev, buff, 0);
1585         }
1586 
1587         if (sk->state == TCP_ESTABLISHED) 
1588                 sk->state = TCP_FIN_WAIT1;
1589         else if (sk->state == TCP_CLOSE_WAIT)
1590                 sk->state = TCP_LAST_ACK;
1591         else
1592                 sk->state = TCP_FIN_WAIT2;
1593 
1594         release_sock(sk);
1595 }
1596 
1597 
1598 static int
1599 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1600              int to_len, int nonblock, unsigned flags,
1601              struct sockaddr_in *addr, int *addr_len)
1602 {
1603   struct sockaddr_in sin;
1604   int len;
1605   int err;
1606   int result;
1607   
1608   /* Have to check these first unlike the old code. If 
1609      we check them after we lose data on an error
1610      which is wrong */
1611   err = verify_area(VERIFY_WRITE,addr_len,sizeof(long));
1612   if(err)
1613         return err;
1614   len = get_fs_long(addr_len);
1615   if(len > sizeof(sin))
1616         len = sizeof(sin);
1617   err=verify_area(VERIFY_WRITE, addr, len);  
1618   if(err)
1619         return err;
1620         
1621   result=tcp_read(sk, to, to_len, nonblock, flags);
1622 
1623   if (result < 0) return(result);
1624   
1625   sin.sin_family = AF_INET;
1626   sin.sin_port = sk->dummy_th.dest;
1627   sin.sin_addr.s_addr = sk->daddr;
1628 
1629   memcpy_tofs(addr, &sin, len);
1630   put_fs_long(len, addr_len);
1631   return(result);
1632 }
1633 
1634 
1635 /*
1636  *      This routine will send an RST to the other tcp. 
1637  */
1638  
1639 static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1640           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1641 {
1642         struct sk_buff *buff;
1643         struct tcphdr *t1;
1644         int tmp;
1645         struct device *ndev=NULL;
1646   
1647 /*
1648  * We need to grab some memory, and put together an RST,
1649  * and then put it into the queue to be sent.
1650  */
1651 
1652         buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1653         if (buff == NULL) 
1654                 return;
1655 
1656         buff->len = sizeof(*t1);
1657         buff->sk = NULL;
1658         buff->dev = dev;
1659         buff->localroute = 0;
1660 
1661         t1 =(struct tcphdr *) buff->data;
1662 
1663         /*
1664          *      Put in the IP header and routing stuff. 
1665          */
1666 
1667         tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
1668                            sizeof(struct tcphdr),tos,ttl);
1669         if (tmp < 0) 
1670         {
1671                 buff->free = 1;
1672                 prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1673                 return;
1674         }
1675 
1676         t1 =(struct tcphdr *)((char *)t1 +tmp);
1677         buff->len += tmp;
1678         memcpy(t1, th, sizeof(*t1));
1679 
1680         /*
1681          *      Swap the send and the receive. 
1682          */
1683 
1684         t1->dest = th->source;
1685         t1->source = th->dest;
1686         t1->rst = 1;  
1687         t1->window = 0;
1688   
1689         if(th->ack)
1690         {
1691                 t1->ack = 0;
1692                 t1->seq = th->ack_seq;
1693                 t1->ack_seq = 0;
1694         }
1695         else
1696         {
1697                 t1->ack = 1;
1698                 if(!th->syn)
1699                         t1->ack_seq=htonl(th->seq);
1700                 else
1701                         t1->ack_seq=htonl(th->seq+1);
1702                 t1->seq=0;
1703         }
1704 
1705         t1->syn = 0;
1706         t1->urg = 0;
1707         t1->fin = 0;
1708         t1->psh = 0;
1709         t1->doff = sizeof(*t1)/4;
1710         tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1711         prot->queue_xmit(NULL, dev, buff, 1);
1712         tcp_statistics.TcpOutSegs++;
1713 }
1714 
1715 
1716 /*
1717  *      Look for tcp options. Parses everything but only knows about MSS.
1718  *      This routine is always called with the packet containing the SYN.
1719  *      However it may also be called with the ack to the SYN.  So you
1720  *      can't assume this is always the SYN.  It's always called after
1721  *      we have set up sk->mtu to our own MTU.
1722  */
1723  
1724 static void
1725 tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1726 {
1727   unsigned char *ptr;
1728   int length=(th->doff*4)-sizeof(struct tcphdr);
1729   int mss_seen = 0;
1730     
1731   ptr = (unsigned char *)(th + 1);
1732   
1733   while(length>0)
1734   {
1735         int opcode=*ptr++;
1736         int opsize=*ptr++;
1737         switch(opcode)
1738         {
1739                 case TCPOPT_EOL:
1740                         return;
1741                 case TCPOPT_NOP:
1742                         length-=2;
1743                         continue;
1744                 
1745                 default:
1746                         if(opsize<=2)   /* Avoid silly options looping forever */
1747                                 return;
1748                         switch(opcode)
1749                         {
1750                                 case TCPOPT_MSS:
1751                                         if(opsize==4 && th->syn)
1752                                         {
1753                                                 sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1754                                                 mss_seen = 1;
1755                                         }
1756                                         break;
1757                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1758                         }
1759                         ptr+=opsize-2;
1760                         length-=opsize;
1761         }
1762   }
1763   if (th->syn) {
1764     if (! mss_seen)
1765       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1766   }
1767 #ifdef CONFIG_INET_PCTCP
1768   sk->mss = min(sk->max_window >> 1, sk->mtu);
1769 #else    
1770   sk->mss = min(sk->max_window, sk->mtu);
1771 #endif  
1772 }
1773 
1774 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1775 {
1776         dst = ntohl(dst);
1777         if (IN_CLASSA(dst))
1778                 return htonl(IN_CLASSA_NET);
1779         if (IN_CLASSB(dst))
1780                 return htonl(IN_CLASSB_NET);
1781         return htonl(IN_CLASSC_NET);
1782 }
1783 
1784 /*
1785  * This routine handles a connection request.
1786  * It should make sure we haven't already responded.
1787  * Because of the way BSD works, we have to send a syn/ack now.
1788  * This also means it will be harder to close a socket which is
1789  * listening.
1790  */
1791 static void
1792 tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1793                  unsigned long daddr, unsigned long saddr,
1794                  struct options *opt, struct device *dev)
1795 {
1796   struct sk_buff *buff;
1797   struct tcphdr *t1;
1798   unsigned char *ptr;
1799   struct sock *newsk;
1800   struct tcphdr *th;
1801   struct device *ndev=NULL;
1802   int tmp;
1803   struct rtable *rt;
1804   
1805   th = skb->h.th;
1806 
1807   /* If the socket is dead, don't accept the connection. */
1808   if (!sk->dead) {
1809         sk->data_ready(sk,0);
1810   } else {
1811         tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1812         tcp_statistics.TcpAttemptFails++;
1813         kfree_skb(skb, FREE_READ);
1814         return;
1815   }
1816 
1817   /*
1818    * Make sure we can accept more.  This will prevent a
1819    * flurry of syns from eating up all our memory.
1820    */
1821   if (sk->ack_backlog >= sk->max_ack_backlog) {
1822         tcp_statistics.TcpAttemptFails++;
1823         kfree_skb(skb, FREE_READ);
1824         return;
1825   }
1826 
1827   /*
1828    * We need to build a new sock struct.
1829    * It is sort of bad to have a socket without an inode attached
1830    * to it, but the wake_up's will just wake up the listening socket,
1831    * and if the listening socket is destroyed before this is taken
1832    * off of the queue, this will take care of it.
1833    */
1834   newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
1835   if (newsk == NULL) {
1836         /* just ignore the syn.  It will get retransmitted. */
1837         tcp_statistics.TcpAttemptFails++;
1838         kfree_skb(skb, FREE_READ);
1839         return;
1840   }
1841 
1842   memcpy(newsk, sk, sizeof(*newsk));
1843   skb_queue_head_init(&newsk->write_queue);
1844   skb_queue_head_init(&newsk->receive_queue);
1845   newsk->send_head = NULL;
1846   newsk->send_tail = NULL;
1847   skb_queue_head_init(&newsk->back_log);
1848   newsk->rtt = 0;               /*TCP_CONNECT_TIME<<3*/
1849   newsk->rto = TCP_TIMEOUT_INIT;
1850   newsk->mdev = 0;
1851   newsk->max_window = 0;
1852   newsk->cong_window = 1;
1853   newsk->cong_count = 0;
1854   newsk->ssthresh = 0;
1855   newsk->backoff = 0;
1856   newsk->blog = 0;
1857   newsk->intr = 0;
1858   newsk->proc = 0;
1859   newsk->done = 0;
1860   newsk->partial = NULL;
1861   newsk->pair = NULL;
1862   newsk->wmem_alloc = 0;
1863   newsk->rmem_alloc = 0;
1864   newsk->localroute = sk->localroute;
1865 
1866   newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
1867 
1868   newsk->err = 0;
1869   newsk->shutdown = 0;
1870   newsk->ack_backlog = 0;
1871   newsk->acked_seq = skb->h.th->seq+1;
1872   newsk->fin_seq = skb->h.th->seq;
1873   newsk->copied_seq = skb->h.th->seq;
1874   newsk->state = TCP_SYN_RECV;
1875   newsk->timeout = 0;
1876   newsk->write_seq = jiffies * SEQ_TICK - seq_offset;
1877   newsk->window_seq = newsk->write_seq;
1878   newsk->rcv_ack_seq = newsk->write_seq;
1879   newsk->urg_data = 0;
1880   newsk->retransmits = 0;
1881   newsk->destroy = 0;
1882   newsk->timer.data = (unsigned long)newsk;
1883   newsk->timer.function = &net_timer;
1884   newsk->dummy_th.source = skb->h.th->dest;
1885   newsk->dummy_th.dest = skb->h.th->source;
1886 
1887   /* Swap these two, they are from our point of view. */
1888   newsk->daddr = saddr;
1889   newsk->saddr = daddr;
1890 
1891   put_sock(newsk->num,newsk);
1892   newsk->dummy_th.res1 = 0;
1893   newsk->dummy_th.doff = 6;
1894   newsk->dummy_th.fin = 0;
1895   newsk->dummy_th.syn = 0;
1896   newsk->dummy_th.rst = 0;
1897   newsk->dummy_th.psh = 0;
1898   newsk->dummy_th.ack = 0;
1899   newsk->dummy_th.urg = 0;
1900   newsk->dummy_th.res2 = 0;
1901   newsk->acked_seq = skb->h.th->seq + 1;
1902   newsk->copied_seq = skb->h.th->seq;
1903 
1904   /* Grab the ttl and tos values and use them */
1905   newsk->ip_ttl=sk->ip_ttl;
1906   newsk->ip_tos=skb->ip_hdr->tos;
1907 
1908 /* use 512 or whatever user asked for */
1909 /* note use of sk->user_mss, since user has no direct access to newsk */
1910   rt=ip_rt_route(saddr, NULL,NULL);
1911   if (sk->user_mss)
1912     newsk->mtu = sk->user_mss;
1913   else if(rt!=NULL && (rt->rt_flags&RTF_MTU))
1914     newsk->mtu = rt->rt_mtu - HEADER_SIZE;
1915   else {
1916 #ifdef CONFIG_INET_SNARL        /* Sub Nets ARe Local */
1917     if ((saddr ^ daddr) & default_mask(saddr))
1918 #else
1919     if ((saddr ^ daddr) & dev->pa_mask)
1920 #endif
1921       newsk->mtu = 576 - HEADER_SIZE;
1922     else
1923       newsk->mtu = MAX_WINDOW;
1924   }
1925 /* but not bigger than device MTU */
1926   newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
1927 
1928 /* this will min with what arrived in the packet */
1929   tcp_options(newsk,skb->h.th);
1930 
1931   buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
1932   if (buff == NULL) {
1933         sk->err = -ENOMEM;
1934         newsk->dead = 1;
1935         release_sock(newsk);
1936         kfree_skb(skb, FREE_READ);
1937         tcp_statistics.TcpAttemptFails++;
1938         return;
1939   }
1940   
1941   buff->len = sizeof(struct tcphdr)+4;
1942   buff->sk = newsk;
1943   buff->localroute = newsk->localroute;
1944     
1945   t1 =(struct tcphdr *) buff->data;
1946 
1947   /* Put in the IP header and routing stuff. */
1948   tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
1949                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
1950 
1951   /* Something went wrong. */
1952   if (tmp < 0) {
1953         sk->err = tmp;
1954         buff->free=1;
1955         kfree_skb(buff,FREE_WRITE);
1956         newsk->dead = 1;
1957         release_sock(newsk);
1958         skb->sk = sk;
1959         kfree_skb(skb, FREE_READ);
1960         tcp_statistics.TcpAttemptFails++;
1961         return;
1962   }
1963 
1964   buff->len += tmp;
1965   t1 =(struct tcphdr *)((char *)t1 +tmp);
1966   
1967   memcpy(t1, skb->h.th, sizeof(*t1));
1968   buff->h.seq = newsk->write_seq;
1969 
1970   /* Swap the send and the receive. */
1971   t1->dest = skb->h.th->source;
1972   t1->source = newsk->dummy_th.source;
1973   t1->seq = ntohl(newsk->write_seq++);
1974   t1->ack = 1;
1975   newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/
1976   newsk->sent_seq = newsk->write_seq;
1977   t1->window = ntohs(newsk->window);
1978   t1->res1 = 0;
1979   t1->res2 = 0;
1980   t1->rst = 0;
1981   t1->urg = 0;
1982   t1->psh = 0;
1983   t1->syn = 1;
1984   t1->ack_seq = ntohl(skb->h.th->seq+1);
1985   t1->doff = sizeof(*t1)/4+1;
1986 
1987   ptr =(unsigned char *)(t1+1);
1988   ptr[0] = 2;
1989   ptr[1] = 4;
1990   ptr[2] = ((newsk->mtu) >> 8) & 0xff;
1991   ptr[3] =(newsk->mtu) & 0xff;
1992 
1993   tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
1994   newsk->prot->queue_xmit(newsk, dev, buff, 0);
1995 
1996   reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_TIMEOUT_INIT);
1997   skb->sk = newsk;
1998 
1999   /* Charge the sock_buff to newsk. */
2000   sk->rmem_alloc -= skb->mem_len;
2001   newsk->rmem_alloc += skb->mem_len;
2002 
2003   skb_queue_tail(&sk->receive_queue,skb);
2004   sk->ack_backlog++;
2005   release_sock(newsk);
2006   tcp_statistics.TcpOutSegs++;
2007 }
2008 
2009 
2010 static void tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2011 {
2012         struct sk_buff *buff;
2013         int need_reset = 0;
2014         struct tcphdr *t1, *th;
2015         struct proto *prot;
2016         struct device *dev=NULL;
2017         int tmp;
2018 
2019         /*
2020          * We need to grab some memory, and put together a FIN, 
2021          * and then put it into the queue to be sent.
2022          */
2023         sk->inuse = 1;
2024         sk->keepopen = 1;
2025         sk->shutdown = SHUTDOWN_MASK;
2026 
2027         if (!sk->dead) 
2028                 sk->state_change(sk);
2029 
2030         /*
2031          *      We need to flush the recv. buffs. 
2032          */
2033 
2034         if (skb_peek(&sk->receive_queue) != NULL) 
2035         {
2036                 struct sk_buff *skb;
2037                 if(sk->debug)
2038                         printk("Clean rcv queue\n");
2039                 while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
2040                 {
2041                         if(skb->len > 0 && after(skb->h.th->seq + skb->len + 1 , sk->copied_seq))
2042                                 need_reset = 1;
2043                         kfree_skb(skb, FREE_READ);
2044                 }
2045                 if(sk->debug)
2046                         printk("Cleaned.\n");
2047         }
2048 
2049         /*
2050          *      Get rid off any half-completed packets. 
2051          */
2052          
2053         if (sk->partial) 
2054         {
2055                 tcp_send_partial(sk);
2056         }
2057 
2058         switch(sk->state) 
2059         {
2060                 case TCP_FIN_WAIT1:
2061                 case TCP_FIN_WAIT2:
2062                 case TCP_CLOSING:
2063                         /*
2064                          * These states occur when we have already closed out
2065                          * our end.  If there is no timeout, we do not do
2066                          * anything.  We may still be in the middle of sending
2067                          * the remainder of our buffer, for example...
2068                          * resetting the timer would be inappropriate.
2069                          *
2070                          * XXX if retransmit count reaches limit, is tcp_close()
2071                          * called with timeout == 1 ? if not, we need to fix that.
2072                          */
2073 #ifdef NOTDEF
2074                         /* 
2075                          *      Start a timer.
2076                          * original code was 4 * sk->rtt.  In converting to the
2077                          * new rtt representation, we can't quite use that.
2078                          * it seems to make most sense to  use the backed off value
2079                          */
2080                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2081 #endif
2082                         if (timeout) 
2083                                 tcp_time_wait(sk);
2084                         release_sock(sk);
2085                         return; /* break causes a double release - messy */
2086                 case TCP_TIME_WAIT:
2087                 case TCP_LAST_ACK:
2088                         /*
2089                          * A timeout from these states terminates the TCB.
2090                          */
2091                         if (timeout) 
2092                         {
2093                                 sk->state = TCP_CLOSE;
2094                         }
2095                         release_sock(sk);
2096                         return;
2097                 case TCP_LISTEN:
2098                         sk->state = TCP_CLOSE;
2099                         release_sock(sk);
2100                         return;
2101                 case TCP_CLOSE:
2102                         release_sock(sk);
2103                         return;
2104                 case TCP_CLOSE_WAIT:
2105                 case TCP_ESTABLISHED:
2106                 case TCP_SYN_SENT:
2107                 case TCP_SYN_RECV:
2108                         prot =(struct proto *)sk->prot;
2109                         th =(struct tcphdr *)&sk->dummy_th;
2110                         buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2111                         if (buff == NULL) 
2112                         {
2113                                 /* This will force it to try again later. */
2114                                 /* Or it would have if someone released the socket
2115                                    first. Anyway it might work now */
2116                                 release_sock(sk);
2117                                 if (sk->state != TCP_CLOSE_WAIT)
2118                                         sk->state = TCP_ESTABLISHED;
2119                                 reset_timer(sk, TIME_CLOSE, 100);
2120                                 return;
2121                         }
2122                         buff->sk = sk;
2123                         buff->free = 1;
2124                         buff->len = sizeof(*t1);
2125                         buff->localroute = sk->localroute;
2126                         t1 =(struct tcphdr *) buff->data;
2127         
2128                         /*
2129                          *      Put in the IP header and routing stuff. 
2130                          */
2131                         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2132                                          IPPROTO_TCP, sk->opt,
2133                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2134                         if (tmp < 0) 
2135                         {
2136                                 kfree_skb(buff,FREE_WRITE);
2137 
2138                                 /*
2139                                  * Enter FIN_WAIT1 to await completion of
2140                                  * written out data and ACK to our FIN.
2141                                  */
2142 
2143                                 if(sk->state==TCP_ESTABLISHED)
2144                                         sk->state=TCP_FIN_WAIT1;
2145                                 else
2146                                         sk->state=TCP_FIN_WAIT2;
2147                                 reset_timer(sk, TIME_CLOSE,4*sk->rto);
2148                                 if(timeout)
2149                                         tcp_time_wait(sk);
2150 
2151                                 release_sock(sk);
2152                                 return;
2153                         }
2154 
2155                         t1 =(struct tcphdr *)((char *)t1 +tmp);
2156                         buff->len += tmp;
2157                         buff->dev = dev;
2158                         memcpy(t1, th, sizeof(*t1));
2159                         t1->seq = ntohl(sk->write_seq);
2160                         sk->write_seq++;
2161                         buff->h.seq = sk->write_seq;
2162                         t1->ack = 1;
2163         
2164                         /* 
2165                          *      Ack everything immediately from now on. 
2166                          */
2167 
2168                         sk->delay_acks = 0;
2169                         t1->ack_seq = ntohl(sk->acked_seq);
2170                         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
2171                         t1->fin = 1;
2172                         t1->rst = need_reset;
2173                         t1->doff = sizeof(*t1)/4;
2174                         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2175 
2176                         tcp_statistics.TcpOutSegs++;
2177         
2178                         if (skb_peek(&sk->write_queue) == NULL) 
2179                         {
2180                                 sk->sent_seq = sk->write_seq;
2181                                 prot->queue_xmit(sk, dev, buff, 0);
2182                         } 
2183                         else 
2184                         {
2185                                 reset_timer(sk, TIME_WRITE, sk->rto);
2186                                 if (buff->next != NULL) 
2187                                 {
2188                                         printk("tcp_close: next != NULL\n");
2189                                         skb_unlink(buff);
2190                                 }
2191                                 skb_queue_tail(&sk->write_queue, buff);
2192                         }
2193 
2194                         /*
2195                          * If established (normal close), enter FIN_WAIT1.
2196                          * If in CLOSE_WAIT, enter LAST_ACK
2197                          * If in CLOSING, remain in CLOSING
2198                          * otherwise enter FIN_WAIT2
2199                          */
2200 
2201                         if (sk->state == TCP_ESTABLISHED)
2202                             sk->state = TCP_FIN_WAIT1;
2203                         else if (sk->state == TCP_CLOSE_WAIT)
2204                             sk->state = TCP_LAST_ACK;
2205                         else if (sk->state != TCP_CLOSING)
2206                             sk->state = TCP_FIN_WAIT2;
2207         }
2208         release_sock(sk);
2209 }
2210 
2211 
2212 /*
2213  * This routine takes stuff off of the write queue,
2214  * and puts it in the xmit queue.
2215  */
2216 static void
2217 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2218 {
2219   struct sk_buff *skb;
2220 
2221   /* The bytes will have to remain here. In time closedown will
2222      empty the write queue and all will be happy */
2223   if(sk->zapped)
2224         return;
2225 
2226   while((skb = skb_peek(&sk->write_queue)) != NULL &&
2227         before(skb->h.seq, sk->window_seq + 1) &&
2228         (sk->retransmits == 0 ||
2229          sk->timeout != TIME_WRITE ||
2230          before(skb->h.seq, sk->rcv_ack_seq + 1))
2231         && sk->packets_out < sk->cong_window) {
2232                 IS_SKB(skb);
2233                 skb_unlink(skb);
2234                 /* See if we really need to send the packet. */
2235                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) {
2236                         sk->retransmits = 0;
2237                         kfree_skb(skb, FREE_WRITE);
2238                         if (!sk->dead) sk->write_space(sk);
2239                 } else {
2240                         sk->sent_seq = skb->h.seq;
2241                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2242                 }
2243         }
2244 }
2245 
2246 
2247 /*
2248  * This routine sorts the send list, and resets the
2249  * sk->send_head and sk->send_tail pointers.
2250  */
2251 void
2252 sort_send(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2253 {
2254   struct sk_buff *list = NULL;
2255   struct sk_buff *skb,*skb2,*skb3;
2256 
2257   for (skb = sk->send_head; skb != NULL; skb = skb2) {
2258         skb2 = skb->link3;
2259         if (list == NULL || before (skb2->h.seq, list->h.seq)) {
2260                 skb->link3 = list;
2261                 sk->send_tail = skb;
2262                 list = skb;
2263         } else {
2264                 for (skb3 = list; ; skb3 = skb3->link3) {
2265                         if (skb3->link3 == NULL ||
2266                             before(skb->h.seq, skb3->link3->h.seq)) {
2267                                 skb->link3 = skb3->link3;
2268                                 skb3->link3 = skb;
2269                                 if (skb->link3 == NULL) sk->send_tail = skb;
2270                                 break;
2271                         }
2272                 }
2273         }
2274   }
2275   sk->send_head = list;
2276 }
2277   
2278 
2279 /*
2280  * This routine deals with incoming acks, but not outgoing ones.
2281  */
2282 
2283 static int
2284 tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2285 {
2286   unsigned long ack;
2287   int flag = 0;
2288   /* 
2289    * 1 - there was data in packet as well as ack or new data is sent or 
2290    *     in shutdown state
2291    * 2 - data from retransmit queue was acked and removed
2292    * 4 - window shrunk or data from retransmit queue was acked and removed
2293    */
2294 
2295   if(sk->zapped)
2296         return(1);      /* Dead, cant ack any more so why bother */
2297 
2298   ack = ntohl(th->ack_seq);
2299   if (ntohs(th->window) > sk->max_window) {
2300         sk->max_window = ntohs(th->window);
2301 #ifdef CONFIG_INET_PCTCP
2302         sk->mss = min(sk->max_window>>1, sk->mtu);
2303 #else
2304         sk->mss = min(sk->max_window, sk->mtu);
2305 #endif  
2306   }
2307 
2308   if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2309         sk->retransmits = 0;
2310 
2311 /* not quite clear why the +1 and -1 here, and why not +1 in next line */
2312   if (after(ack, sk->sent_seq+1) || before(ack, sk->rcv_ack_seq-1)) {
2313         if (after(ack, sk->sent_seq) ||
2314            (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) {
2315                 return(0);
2316         }
2317         if (sk->keepopen) {
2318                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2319         }
2320         return(1);
2321   }
2322 
2323   if (len != th->doff*4) flag |= 1;
2324 
2325   /* See if our window has been shrunk. */
2326   if (after(sk->window_seq, ack+ntohs(th->window))) {
2327         /*
2328          * We may need to move packets from the send queue
2329          * to the write queue, if the window has been shrunk on us.
2330          * The RFC says you are not allowed to shrink your window
2331          * like this, but if the other end does, you must be able
2332          * to deal with it.
2333          */
2334         struct sk_buff *skb;
2335         struct sk_buff *skb2;
2336         struct sk_buff *wskb = NULL;
2337   
2338         skb2 = sk->send_head;
2339         sk->send_head = NULL;
2340         sk->send_tail = NULL;
2341 
2342         flag |= 4;
2343 
2344         sk->window_seq = ack + ntohs(th->window);
2345         cli();
2346         while (skb2 != NULL) {
2347                 skb = skb2;
2348                 skb2 = skb->link3;
2349                 skb->link3 = NULL;
2350                 if (after(skb->h.seq, sk->window_seq)) {
2351                         if (sk->packets_out > 0) sk->packets_out--;
2352                         /* We may need to remove this from the dev send list. */
2353                         if (skb->next != NULL) {
2354                                 skb_unlink(skb);                                
2355                         }
2356                         /* Now add it to the write_queue. */
2357                         if (wskb == NULL)
2358                                 skb_queue_head(&sk->write_queue,skb);
2359                         else
2360                                 skb_append(wskb,skb);
2361                         wskb = skb;
2362                 } else {
2363                         if (sk->send_head == NULL) {
2364                                 sk->send_head = skb;
2365                                 sk->send_tail = skb;
2366                         } else {
2367                                 sk->send_tail->link3 = skb;
2368                                 sk->send_tail = skb;
2369                         }
2370                         skb->link3 = NULL;
2371                 }
2372         }
2373         sti();
2374   }
2375 
2376   if (sk->send_tail == NULL || sk->send_head == NULL) {
2377         sk->send_head = NULL;
2378         sk->send_tail = NULL;
2379         sk->packets_out= 0;
2380   }
2381 
2382   sk->window_seq = ack + ntohs(th->window);
2383 
2384   /* We don't want too many packets out there. */
2385   if (sk->timeout == TIME_WRITE && 
2386       sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) {
2387 /* 
2388  * This is Jacobson's slow start and congestion avoidance. 
2389  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2390  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2391  * counter and increment it once every cwnd times.  It's possible
2392  * that this should be done only if sk->retransmits == 0.  I'm
2393  * interpreting "new data is acked" as including data that has
2394  * been retransmitted but is just now being acked.
2395  */
2396         if (sk->cong_window < sk->ssthresh)  
2397           /* in "safe" area, increase */
2398           sk->cong_window++;
2399         else {
2400           /* in dangerous area, increase slowly.  In theory this is
2401              sk->cong_window += 1 / sk->cong_window
2402            */
2403           if (sk->cong_count >= sk->cong_window) {
2404             sk->cong_window++;
2405             sk->cong_count = 0;
2406           } else 
2407             sk->cong_count++;
2408         }
2409   }
2410 
2411   sk->rcv_ack_seq = ack;
2412 
2413   /*
2414    * if this ack opens up a zero window, clear backoff.  It was
2415    * being used to time the probes, and is probably far higher than
2416    * it needs to be for normal retransmission
2417    */
2418   if (sk->timeout == TIME_PROBE0) {
2419         if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
2420             ! before (sk->window_seq, sk->write_queue.next->h.seq)) {
2421           sk->retransmits = 0;
2422           sk->backoff = 0;
2423           /* recompute rto from rtt.  this eliminates any backoff */
2424           sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2425           if (sk->rto > 120*HZ)
2426             sk->rto = 120*HZ;
2427           if (sk->rto < 2)      /* Was 1*HZ */
2428             sk->rto = 2;
2429         }
2430   }
2431 
2432   /* See if we can take anything off of the retransmit queue. */
2433   while(sk->send_head != NULL) {
2434         /* Check for a bug. */
2435         if (sk->send_head->link3 &&
2436             after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) {
2437                 printk("INET: tcp.c: *** bug send_list out of order.\n");
2438                 sort_send(sk);
2439         }
2440 
2441         if (before(sk->send_head->h.seq, ack+1)) {
2442                 struct sk_buff *oskb;
2443 
2444                 if (sk->retransmits) {
2445 
2446                   /* we were retransmitting.  don't count this in RTT est */
2447                   flag |= 2;
2448 
2449                   /*
2450                    * even though we've gotten an ack, we're still
2451                    * retransmitting as long as we're sending from
2452                    * the retransmit queue.  Keeping retransmits non-zero
2453                    * prevents us from getting new data interspersed with
2454                    * retransmissions.
2455                    */
2456 
2457                   if (sk->send_head->link3)
2458                     sk->retransmits = 1;
2459                   else
2460                     sk->retransmits = 0;
2461 
2462                 }
2463 
2464                 /*
2465                  * Note that we only reset backoff and rto in the
2466                  * rtt recomputation code.  And that doesn't happen
2467                  * if there were retransmissions in effect.  So the
2468                  * first new packet after the retransmissions is
2469                  * sent with the backoff still in effect.  Not until
2470                  * we get an ack from a non-retransmitted packet do
2471                  * we reset the backoff and rto.  This allows us to deal
2472                  * with a situation where the network delay has increased
2473                  * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2474                  */
2475 
2476                 /* We have one less packet out there. */
2477                 if (sk->packets_out > 0) sk->packets_out --;
2478                 /* Wake up the process, it can probably write more. */
2479                 if (!sk->dead) sk->write_space(sk);
2480 
2481                 oskb = sk->send_head;
2482 
2483                 if (!(flag&2)) {
2484                   long m;
2485 
2486                   /* The following amusing code comes from Jacobson's
2487                    * article in SIGCOMM '88.  Note that rtt and mdev
2488                    * are scaled versions of rtt and mean deviation.
2489                    * This is designed to be as fast as possible 
2490                    * m stands for "measurement".
2491                    */
2492 
2493                   m = jiffies - oskb->when;  /* RTT */
2494                   if(m<=0)
2495                         m=1;                 /* IS THIS RIGHT FOR <0 ??? */
2496                   m -= (sk->rtt >> 3);       /* m is now error in rtt est */
2497                   sk->rtt += m;              /* rtt = 7/8 rtt + 1/8 new */
2498                   if (m < 0)
2499                     m = -m;                  /* m is now abs(error) */
2500                   m -= (sk->mdev >> 2);      /* similar update on mdev */
2501                   sk->mdev += m;             /* mdev = 3/4 mdev + 1/4 new */
2502 
2503                   /* now update timeout.  Note that this removes any backoff */
2504                   sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2505                   if (sk->rto > 120*HZ)
2506                     sk->rto = 120*HZ;
2507                   if (sk->rto < 2)      /* Was 1*HZ */
2508                     sk->rto = 2;
2509                   sk->backoff = 0;
2510 
2511                 }
2512                 flag |= (2|4);
2513 
2514                 cli();
2515 
2516                 oskb = sk->send_head;
2517                 IS_SKB(oskb);
2518                 sk->send_head = oskb->link3;
2519                 if (sk->send_head == NULL) {
2520                         sk->send_tail = NULL;
2521                 }
2522 
2523                 /* We may need to remove this from the dev send list. */
2524                 if (oskb->next)
2525                         skb_unlink(oskb);
2526                 sti();
2527                 kfree_skb(oskb, FREE_WRITE); /* write. */
2528                 if (!sk->dead) sk->write_space(sk);
2529         } else {
2530                 break;
2531         }
2532   }
2533 
2534   /*
2535    * Maybe we can take some stuff off of the write queue,
2536    * and put it onto the xmit queue.
2537    */
2538   if (skb_peek(&sk->write_queue) != NULL) {
2539         if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
2540                 (sk->retransmits == 0 || 
2541                  sk->timeout != TIME_WRITE ||
2542                  before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
2543                 && sk->packets_out < sk->cong_window) {
2544                 flag |= 1;
2545                 tcp_write_xmit(sk);
2546         } else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
2547                    sk->send_head == NULL &&
2548                    sk->ack_backlog == 0 &&
2549                    sk->state != TCP_TIME_WAIT) {
2550                 reset_timer(sk, TIME_PROBE0, sk->rto);
2551         }               
2552   } else {
2553         if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2554             sk->state != TCP_TIME_WAIT && !sk->keepopen) {
2555                 if (!sk->dead) sk->write_space(sk);
2556 
2557                 if (sk->keepopen)
2558                         reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2559                 else
2560                         delete_timer(sk);
2561         } else {
2562                 if (sk->state != (unsigned char) sk->keepopen) {
2563                         reset_timer(sk, TIME_WRITE, sk->rto);
2564                 }
2565                 if (sk->state == TCP_TIME_WAIT) {
2566                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2567                 }
2568         }
2569   }
2570 
2571   if (sk->packets_out == 0 && sk->partial != NULL &&
2572       skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) {
2573         flag |= 1;
2574         tcp_send_partial(sk);
2575   }
2576 
2577   /*
2578    * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
2579    * we are now waiting for an acknowledge to our FIN.  The other end is
2580    * already in TIME_WAIT.
2581    *
2582    * Move to TCP_CLOSE on success.
2583    */
2584 
2585   if (sk->state == TCP_LAST_ACK) {
2586         if (!sk->dead)
2587                 sk->state_change(sk);
2588         if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) {
2589                 flag |= 1;
2590                 sk->state = TCP_CLOSE;
2591                 sk->shutdown = SHUTDOWN_MASK;
2592         }
2593   }
2594 
2595   /*
2596    * Incomming ACK to a FIN we sent in the case of our initiating the close.
2597    *
2598    * Move to FIN_WAIT2 to await a FIN from the other end.
2599    */
2600 
2601   if (sk->state == TCP_FIN_WAIT1) {
2602         if (!sk->dead) 
2603                 sk->state_change(sk);
2604         if (sk->rcv_ack_seq == sk->write_seq) {
2605                 flag |= 1;
2606                 if (sk->acked_seq != sk->fin_seq) {
2607                         tcp_time_wait(sk);
2608                 } else {
2609                         sk->shutdown = SHUTDOWN_MASK;
2610                         sk->state = TCP_FIN_WAIT2;
2611                 }
2612         }
2613   }
2614 
2615   /*
2616    * Incomming ACK to a FIN we sent in the case of a simultanious close.
2617    *
2618    * Move to TIME_WAIT
2619    */
2620 
2621   if (sk->state == TCP_CLOSING) {
2622         if (!sk->dead) 
2623                 sk->state_change(sk);
2624         if (sk->rcv_ack_seq == sk->write_seq) {
2625                 flag |= 1;
2626                 tcp_time_wait(sk);
2627         }
2628   }
2629 
2630 /*
2631  * I make no guarantees about the first clause in the following
2632  * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
2633  * what conditions "!flag" would be true.  However I think the rest
2634  * of the conditions would prevent that from causing any
2635  * unnecessary retransmission. 
2636  *   Clearly if the first packet has expired it should be 
2637  * retransmitted.  The other alternative, "flag&2 && retransmits", is
2638  * harder to explain:  You have to look carefully at how and when the
2639  * timer is set and with what timeout.  The most recent transmission always
2640  * sets the timer.  So in general if the most recent thing has timed
2641  * out, everything before it has as well.  So we want to go ahead and
2642  * retransmit some more.  If we didn't explicitly test for this
2643  * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
2644  * would not be true.  If you look at the pattern of timing, you can
2645  * show that rto is increased fast enough that the next packet would
2646  * almost never be retransmitted immediately.  Then you'd end up
2647  * waiting for a timeout to send each packet on the retranmission
2648  * queue.  With my implementation of the Karn sampling algorithm,
2649  * the timeout would double each time.  The net result is that it would
2650  * take a hideous amount of time to recover from a single dropped packet.
2651  * It's possible that there should also be a test for TIME_WRITE, but
2652  * I think as long as "send_head != NULL" and "retransmit" is on, we've
2653  * got to be in real retransmission mode.
2654  *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
2655  * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
2656  * As long as no further losses occur, this seems reasonable.
2657  */
2658 
2659   if (((!flag) || (flag&4)) && sk->send_head != NULL &&
2660       (((flag&2) && sk->retransmits) ||
2661        (sk->send_head->when + sk->rto < jiffies))) {
2662         ip_do_retransmit(sk, 1);
2663         reset_timer(sk, TIME_WRITE, sk->rto);
2664       }
2665 
2666   return(1);
2667 }
2668 
2669 
2670 /*
2671  * This routine handles the data.  If there is room in the buffer,
2672  * it will be have already been moved into it.  If there is no
2673  * room, then we will just have to discard the packet.
2674  */
2675 static int
2676 tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
2677          unsigned long saddr, unsigned short len)
2678 {
2679   struct sk_buff *skb1, *skb2;
2680   struct tcphdr *th;
2681   int dup_dumped=0;
2682 
2683   th = skb->h.th;
2684   skb->len = len -(th->doff*4);
2685 
2686   sk->bytes_rcv += skb->len;
2687   if (skb->len == 0 && !th->fin && !th->urg && !th->psh) {
2688         /* Don't want to keep passing ack's back and forth. */
2689         if (!th->ack) tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
2690         kfree_skb(skb, FREE_READ);
2691         return(0);
2692   }
2693 
2694   if (sk->shutdown & RCV_SHUTDOWN && skb->len!=0 /* Added AGC */) {
2695         sk->acked_seq = th->seq + skb->len + th->syn + th->fin;
2696         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
2697                 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
2698         tcp_statistics.TcpEstabResets++;
2699         sk->state = TCP_CLOSE;
2700         sk->err = EPIPE;
2701         sk->shutdown = SHUTDOWN_MASK;
2702         kfree_skb(skb, FREE_READ);
2703         if (!sk->dead) sk->state_change(sk);
2704         return(0);
2705   }
2706 
2707   /*
2708    * Now we have to walk the chain, and figure out where this one
2709    * goes into it.  This is set up so that the last packet we received
2710    * will be the first one we look at, that way if everything comes
2711    * in order, there will be no performance loss, and if they come
2712    * out of order we will be able to fit things in nicely.
2713    */
2714 
2715   /* This should start at the last one, and then go around forwards. */
2716   if (skb_peek(&sk->receive_queue) == NULL) {
2717         skb_queue_head(&sk->receive_queue,skb);
2718         skb1= NULL;
2719   } else {
2720         for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) {
2721                 if(sk->debug)
2722                 {
2723                         printk("skb1=%p :", skb1);
2724                         printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
2725                         printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
2726                         printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
2727                                         sk->acked_seq);
2728                 }
2729                 if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
2730                 {
2731                         skb_append(skb1,skb);
2732                         skb_unlink(skb1);
2733                         kfree_skb(skb1,FREE_READ);
2734                         dup_dumped=1;
2735                         skb1=NULL;
2736                         break;
2737                 }
2738                 if (after(th->seq+1, skb1->h.th->seq))
2739                 {
2740                         skb_append(skb1,skb);
2741                         break;
2742                 }
2743                 if (skb1 == skb_peek(&sk->receive_queue))
2744                 {
2745                         skb_queue_head(&sk->receive_queue, skb);
2746                         break;
2747                 }
2748         }
2749   }
2750 
2751   th->ack_seq = th->seq + skb->len;
2752   if (th->syn) th->ack_seq++;
2753   if (th->fin) th->ack_seq++;
2754 
2755   if (before(sk->acked_seq, sk->copied_seq)) {
2756         printk("*** tcp.c:tcp_data bug acked < copied\n");
2757         sk->acked_seq = sk->copied_seq;
2758   }
2759 
2760   /* Now figure out if we can ack anything. */
2761   if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) {
2762       if (before(th->seq, sk->acked_seq+1)) {
2763                 int newwindow;
2764 
2765                 if (after(th->ack_seq, sk->acked_seq)) {
2766                         newwindow = sk->window -
2767                                        (th->ack_seq - sk->acked_seq);
2768                         if (newwindow < 0)
2769                                 newwindow = 0;  
2770                         sk->window = newwindow;
2771                         sk->acked_seq = th->ack_seq;
2772                 }
2773                 skb->acked = 1;
2774 
2775                 /* When we ack the fin, we turn on the RCV_SHUTDOWN flag. */
2776                 if (skb->h.th->fin) {
2777                         if (!sk->dead) sk->state_change(sk);
2778                         sk->shutdown |= RCV_SHUTDOWN;
2779                 }
2780           
2781                 for(skb2 = skb->next;
2782                     skb2 != (struct sk_buff *)&sk->receive_queue;
2783                     skb2 = skb2->next) {
2784                         if (before(skb2->h.th->seq, sk->acked_seq+1)) {
2785                                 if (after(skb2->h.th->ack_seq, sk->acked_seq))
2786                                 {
2787                                         newwindow = sk->window -
2788                                          (skb2->h.th->ack_seq - sk->acked_seq);
2789                                         if (newwindow < 0)
2790                                                 newwindow = 0;  
2791                                         sk->window = newwindow;
2792                                         sk->acked_seq = skb2->h.th->ack_seq;
2793                                 }
2794                                 skb2->acked = 1;
2795 
2796                                 /*
2797                                  * When we ack the fin, we turn on
2798                                  * the RCV_SHUTDOWN flag.
2799                                  */
2800                                 if (skb2->h.th->fin) {
2801                                         sk->shutdown |= RCV_SHUTDOWN;
2802                                         if (!sk->dead) sk->state_change(sk);
2803                                 }
2804 
2805                                 /* Force an immediate ack. */
2806                                 sk->ack_backlog = sk->max_ack_backlog;
2807                         } else {
2808                                 break;
2809                         }
2810                 }
2811 
2812                 /*
2813                  * This also takes care of updating the window.
2814                  * This if statement needs to be simplified.
2815                  */
2816                 if (!sk->delay_acks ||
2817                     sk->ack_backlog >= sk->max_ack_backlog || 
2818                     sk->bytes_rcv > sk->max_unacked || th->fin) {
2819 /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
2820                 } else {
2821                         sk->ack_backlog++;
2822                         if(sk->debug)
2823                                 printk("Ack queued.\n");
2824                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
2825                 }
2826         }
2827   }
2828 
2829   /*
2830    * If we've missed a packet, send an ack.
2831    * Also start a timer to send another.
2832    */
2833   if (!skb->acked) {
2834         /*
2835          * This is important.  If we don't have much room left,
2836          * we need to throw out a few packets so we have a good
2837          * window.  Note that mtu is used, not mss, because mss is really
2838          * for the send side.  He could be sending us stuff as large as mtu.
2839          */
2840         while (sk->prot->rspace(sk) < sk->mtu) {
2841                 skb1 = skb_peek(&sk->receive_queue);
2842                 if (skb1 == NULL) {
2843                         printk("INET: tcp.c:tcp_data memory leak detected.\n");
2844                         break;
2845                 }
2846 
2847                 /* Don't throw out something that has been acked. */
2848                 if (skb1->acked) {
2849                         break;
2850                 }
2851                 
2852                 skb_unlink(skb1);
2853                 kfree_skb(skb1, FREE_READ);
2854         }
2855         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
2856         sk->ack_backlog++;
2857         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
2858   } else {
2859         /* We missed a packet.  Send an ack to try to resync things. */
2860         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
2861   }
2862 
2863   /* Now tell the user we may have some data. */
2864   if (!sk->dead) {
2865         if(sk->debug)
2866                 printk("Data wakeup.\n");
2867         sk->data_ready(sk,0);
2868   } 
2869 
2870 #ifdef NOTDEF   /* say what?  this is handled by tcp_ack() */
2871 
2872   if (sk->state == TCP_FIN_WAIT2 &&
2873       sk->acked_seq == sk->fin_seq && sk->rcv_ack_seq == sk->write_seq) {
2874 /*      tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr); */
2875         sk->shutdown = SHUTDOWN_MASK;
2876         sk->state = TCP_LAST_ACK;
2877         if (!sk->dead) sk->state_change(sk);
2878   }
2879 #endif
2880 
2881   return(0);
2882 }
2883 
2884 
2885 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
2886 {
2887         unsigned long ptr = ntohs(th->urg_ptr);
2888 
2889         if (ptr)
2890                 ptr--;
2891         ptr += th->seq;
2892 
2893         /* ignore urgent data that we've already seen and read */
2894         if (after(sk->copied_seq+1, ptr))
2895                 return;
2896 
2897         /* do we already have a newer (or duplicate) urgent pointer? */
2898         if (sk->urg_data && !after(ptr, sk->urg_seq))
2899                 return;
2900 
2901         /* tell the world about our new urgent pointer */
2902         if (sk->proc != 0) {
2903                 if (sk->proc > 0) {
2904                         kill_proc(sk->proc, SIGURG, 1);
2905                 } else {
2906                         kill_pg(-sk->proc, SIGURG, 1);
2907                 }
2908         }
2909         sk->urg_data = URG_NOTYET;
2910         sk->urg_seq = ptr;
2911 }
2912 
2913 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
2914         unsigned long saddr, unsigned long len)
2915 {
2916         unsigned long ptr;
2917 
2918         /* check if we get a new urgent pointer */
2919         if (th->urg)
2920                 tcp_check_urg(sk,th);
2921 
2922         /* do we wait for any urgent data? */
2923         if (sk->urg_data != URG_NOTYET)
2924                 return 0;
2925 
2926         /* is the urgent pointer pointing into this packet? */
2927         ptr = sk->urg_seq - th->seq + th->doff*4;
2928         if (ptr >= len)
2929                 return 0;
2930 
2931         /* ok, got the correct packet, update info */
2932         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
2933         if (!sk->dead)
2934                 sk->data_ready(sk,0);
2935         return 0;
2936 }
2937 
2938 
2939 /*
2940  *  This deals with incoming fins. 'Linus at 9 O'clock' 8-) 
2941  *
2942  *  If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
2943  *  (and thence onto LAST-ACK and finally, CLOSED, we never enter
2944  *  TIME-WAIT)
2945  *
2946  *  If we are in FINWAIT-1, a received FIN indicates simultanious
2947  *  close and we go into CLOSING (and later onto TIME-WAIT)
2948  *
2949  *  If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
2950  *
2951  */
2952  
2953 static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
2954          unsigned long saddr, struct device *dev)
2955 {
2956         sk->fin_seq = th->seq + skb->len + th->syn + th->fin;
2957 
2958         if (!sk->dead) 
2959         {
2960                 sk->state_change(sk);
2961         }
2962 
2963         switch(sk->state) 
2964         {
2965                 case TCP_SYN_RECV:
2966                 case TCP_SYN_SENT:
2967                 case TCP_ESTABLISHED:
2968                         /*
2969                          * move to CLOSE_WAIT, tcp_data() already handled
2970                          * sending the ack.
2971                          */
2972                         reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
2973                         /*sk->fin_seq = th->seq+1;*/
2974                         tcp_statistics.TcpCurrEstab--;
2975                         sk->state = TCP_CLOSE_WAIT;
2976                         if (th->rst)
2977                                 sk->shutdown = SHUTDOWN_MASK;
2978                         break;
2979 
2980                 case TCP_CLOSE_WAIT:
2981                 case TCP_CLOSING:
2982                         /*
2983                          * received a retransmission of the FIN, do
2984                          * nothing.
2985                          */
2986                         break;
2987                 case TCP_TIME_WAIT:
2988                         /*
2989                          * received a retransmission of the FIN,
2990                          * restart the TIME_WAIT timer.
2991                          */
2992                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2993                         return(0);
2994                 case TCP_FIN_WAIT1:
2995                         /*
2996                          * This case occurs when a simultanious close
2997                          * happens, we must ack the received FIN and
2998                          * enter the CLOSING state.
2999                          *
3000                          * XXX timeout not set properly
3001                          */
3002 
3003                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3004                         /*sk->fin_seq = th->seq+1;*/
3005                         sk->state = TCP_CLOSING;
3006                         break;
3007                 case TCP_FIN_WAIT2:
3008                         /*
3009                          * received a FIN -- send ACK and enter TIME_WAIT
3010                          */
3011                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3012                         /*sk->fin_seq = th->seq+1;*/
3013                         sk->state = TCP_TIME_WAIT;
3014                         break;
3015                 case TCP_CLOSE:
3016                         /*
3017                          * already in CLOSE
3018                          */
3019                         break;
3020                 default:
3021                         sk->state = TCP_LAST_ACK;
3022         
3023                         /* Start the timers. */
3024                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3025                         return(0);
3026         }
3027         sk->ack_backlog++;
3028 
3029         return(0);
3030 }
3031 
3032 
3033 /* This will accept the next outstanding connection. */
3034 static struct sock *
3035 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
3036 {
3037   struct sock *newsk;
3038   struct sk_buff *skb;
3039   
3040   /*
3041    * We need to make sure that this socket is listening,
3042    * and that it has something pending.
3043    */
3044   if (sk->state != TCP_LISTEN) {
3045         sk->err = EINVAL;
3046         return(NULL); 
3047   }
3048 
3049   /* avoid the race. */
3050   cli();
3051   sk->inuse = 1;
3052   while((skb = skb_dequeue(&sk->receive_queue)) == NULL) {
3053         if (flags & O_NONBLOCK) {
3054                 sti();
3055                 release_sock(sk);
3056                 sk->err = EAGAIN;
3057                 return(NULL);
3058         }
3059 
3060         release_sock(sk);
3061         interruptible_sleep_on(sk->sleep);
3062         if (current->signal & ~current->blocked) {
3063                 sti();
3064                 sk->err = ERESTARTSYS;
3065                 return(NULL);
3066         }
3067         sk->inuse = 1;
3068   }
3069   sti();
3070 
3071   /* Now all we need to do is return skb->sk. */
3072   newsk = skb->sk;
3073 
3074   kfree_skb(skb, FREE_READ);
3075   sk->ack_backlog--;
3076   release_sock(sk);
3077   return(newsk);
3078 }
3079 
3080 
3081 /*
3082  *      This will initiate an outgoing connection. 
3083  */
3084  
3085 static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3086 {
3087         struct sk_buff *buff;
3088         struct sockaddr_in sin;
3089         struct device *dev=NULL;
3090         unsigned char *ptr;
3091         int tmp;
3092         struct tcphdr *t1;
3093         int err;
3094         struct rtable *rt;
3095 
3096         if (sk->state != TCP_CLOSE) 
3097                 return(-EISCONN);
3098         if (addr_len < 8) 
3099                 return(-EINVAL);
3100 
3101         err=verify_area(VERIFY_READ, usin, addr_len);
3102         if(err)
3103                 return err;
3104         
3105         memcpy_fromfs(&sin,usin, min(sizeof(sin), addr_len));
3106 
3107         if (sin.sin_family && sin.sin_family != AF_INET) 
3108                 return(-EAFNOSUPPORT);
3109 
3110         /*
3111          *      connect() to INADDR_ANY means loopback (BSD'ism).
3112          */
3113         
3114         if(sin.sin_addr.s_addr==INADDR_ANY)
3115                 sin.sin_addr.s_addr=ip_my_addr();
3116                   
3117         /*
3118          *      Don't want a TCP connection going to a broadcast address 
3119          */
3120 
3121         if (ip_chk_addr(sin.sin_addr.s_addr) == IS_BROADCAST) 
3122         { 
3123                 return -ENETUNREACH;
3124         }
3125   
3126         /*
3127          *      Connect back to the same socket: Blows up so disallow it 
3128          */
3129 
3130         if(sk->saddr == sin.sin_addr.s_addr && sk->num==ntohs(sin.sin_port))
3131                 return -EBUSY;
3132 
3133         sk->inuse = 1;
3134         sk->daddr = sin.sin_addr.s_addr;
3135         sk->write_seq = jiffies * SEQ_TICK - seq_offset;
3136         sk->window_seq = sk->write_seq;
3137         sk->rcv_ack_seq = sk->write_seq -1;
3138         sk->err = 0;
3139         sk->dummy_th.dest = sin.sin_port;
3140         release_sock(sk);
3141 
3142         buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3143         if (buff == NULL) 
3144         {
3145                 return(-ENOMEM);
3146         }
3147         sk->inuse = 1;
3148         buff->len = 24;
3149         buff->sk = sk;
3150         buff->free = 1;
3151         buff->localroute = sk->localroute;
3152         
3153         t1 = (struct tcphdr *) buff->data;
3154 
3155         /*
3156          *      Put in the IP header and routing stuff. 
3157          */
3158          
3159         rt=ip_rt_route(sk->daddr, NULL, NULL);
3160         
3161 
3162         /*
3163          *      We need to build the routing stuff fromt the things saved in skb. 
3164          */
3165 
3166         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3167                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3168         if (tmp < 0) 
3169         {
3170                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3171                 release_sock(sk);
3172                 return(-ENETUNREACH);
3173         }
3174 
3175         buff->len += tmp;
3176         t1 = (struct tcphdr *)((char *)t1 +tmp);
3177 
3178         memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3179         t1->seq = ntohl(sk->write_seq++);
3180         sk->sent_seq = sk->write_seq;
3181         buff->h.seq = sk->write_seq;
3182         t1->ack = 0;
3183         t1->window = 2;
3184         t1->res1=0;
3185         t1->res2=0;
3186         t1->rst = 0;
3187         t1->urg = 0;
3188         t1->psh = 0;
3189         t1->syn = 1;
3190         t1->urg_ptr = 0;
3191         t1->doff = 6;
3192         /* use 512 or whatever user asked for */
3193 
3194         if (sk->user_mss)
3195                 sk->mtu = sk->user_mss;
3196         else if(rt!=NULL && rt->rt_flags&RTF_MTU)
3197                 sk->mtu = rt->rt_mtu;
3198         else 
3199         {
3200 #ifdef SUBNETSARELOCAL
3201                 if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3202 #else
3203                 if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3204 #endif
3205                         sk->mtu = 576 - HEADER_SIZE;
3206                 else
3207                         sk->mtu = MAX_WINDOW;
3208         }
3209         /*
3210          *      but not bigger than device MTU 
3211          */
3212 
3213         if(sk->mtu <32)
3214                 sk->mtu = 32;   /* Sanity limit */
3215                 
3216         sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3217         
3218         /*
3219          *      Put in the TCP options to say MTU. 
3220          */
3221 
3222         ptr = (unsigned char *)(t1+1);
3223         ptr[0] = 2;
3224         ptr[1] = 4;
3225         ptr[2] = (sk->mtu) >> 8;
3226         ptr[3] = (sk->mtu) & 0xff;
3227         tcp_send_check(t1, sk->saddr, sk->daddr,
3228                   sizeof(struct tcphdr) + 4, sk);
3229 
3230         /*
3231          *      This must go first otherwise a really quick response will get reset. 
3232          */
3233 
3234         sk->state = TCP_SYN_SENT;
3235 /*      sk->rtt = TCP_CONNECT_TIME;*/
3236         sk->rto = TCP_TIMEOUT_INIT;
3237         reset_timer(sk, TIME_WRITE, sk->rto);   /* Timer for repeating the SYN until an answer */
3238         sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3239 
3240         sk->prot->queue_xmit(sk, dev, buff, 0);  
3241         tcp_statistics.TcpActiveOpens++;
3242         tcp_statistics.TcpOutSegs++;
3243   
3244         release_sock(sk);
3245         return(0);
3246 }
3247 
3248 
3249 /* This functions checks to see if the tcp header is actually acceptable. */
3250 static int
3251 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3252              struct options *opt, unsigned long saddr, struct device *dev)
3253 {
3254         unsigned long next_seq;
3255 
3256         next_seq = len - 4*th->doff;
3257         if (th->fin)
3258                 next_seq++;
3259         /* if we have a zero window, we can't have any data in the packet.. */
3260         if (next_seq && !sk->window)
3261                 goto ignore_it;
3262         next_seq += th->seq;
3263 
3264         /*
3265          * This isn't quite right.  sk->acked_seq could be more recent
3266          * than sk->window.  This is however close enough.  We will accept
3267          * slightly more packets than we should, but it should not cause
3268          * problems unless someone is trying to forge packets.
3269          */
3270 
3271         /* have we already seen all of this packet? */
3272         if (!after(next_seq+1, sk->acked_seq))
3273                 goto ignore_it;
3274         /* or does it start beyond the window? */
3275         if (!before(th->seq, sk->acked_seq + sk->window + 1))
3276                 goto ignore_it;
3277 
3278         /* ok, at least part of this packet would seem interesting.. */
3279         return 1;
3280 
3281 ignore_it:
3282         if (th->rst)
3283                 return 0;
3284 
3285         /*
3286          *      Send a reset if we get something not ours and we are
3287          *      unsynchronized. Note: We don't do anything to our end. We
3288          *      are just killing the bogus remote connection then we will
3289          *      connect again and it will work (with luck).
3290          */
3291          
3292         if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) {
3293                 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3294                 return 1;
3295         }
3296 
3297         /* Try to resync things. */
3298         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3299         return 0;
3300 }
3301 
3302 
3303 int
3304 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3305         unsigned long daddr, unsigned short len,
3306         unsigned long saddr, int redo, struct inet_protocol * protocol)
3307 {
3308   struct tcphdr *th;
3309   struct sock *sk;
3310 
3311   if (!skb) {
3312         return(0);
3313   }
3314 
3315   if (!dev) 
3316   {
3317         return(0);
3318   }
3319   
3320   tcp_statistics.TcpInSegs++;
3321   
3322   th = skb->h.th;
3323 
3324   /* Find the socket. */
3325   sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3326   /* If this socket has got a reset its to all intents and purposes 
3327      really dead */
3328   if (sk!=NULL && sk->zapped)
3329         sk=NULL;
3330 
3331   if (!redo) {
3332         if (tcp_check(th, len, saddr, daddr )) {
3333                 skb->sk = NULL;
3334                 kfree_skb(skb,FREE_READ);
3335                 /*
3336                  * We don't release the socket because it was
3337                  * never marked in use.
3338                  */
3339                 return(0);
3340         }
3341 
3342         th->seq = ntohl(th->seq);
3343 
3344         /* See if we know about the socket. */
3345         if (sk == NULL) {
3346                 if (!th->rst)
3347                         tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3348                 skb->sk = NULL;
3349                 kfree_skb(skb, FREE_READ);
3350                 return(0);
3351         }
3352 
3353         skb->len = len;
3354         skb->sk = sk;
3355         skb->acked = 0;
3356         skb->used = 0;
3357         skb->free = 0;
3358         skb->saddr = daddr;
3359         skb->daddr = saddr;
3360 
3361         /* We may need to add it to the backlog here. */
3362         cli();
3363         if (sk->inuse) {
3364                 skb_queue_head(&sk->back_log, skb);
3365                 sti();
3366                 return(0);
3367         }
3368         sk->inuse = 1;
3369         sti();
3370   } else {
3371         if (!sk) {
3372                 return(0);
3373         }
3374   }
3375 
3376   if (!sk->prot) {
3377         return(0);
3378   }
3379 
3380   /* Charge the memory to the socket. */
3381   if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) {
3382         skb->sk = NULL;
3383         kfree_skb(skb, FREE_READ);
3384         release_sock(sk);
3385         return(0);
3386   }
3387   sk->rmem_alloc += skb->mem_len;
3388 
3389 
3390   /* Now deal with it. */
3391   switch(sk->state) {
3392         /*
3393          * This should close the system down if it's waiting
3394          * for an ack that is never going to be sent.
3395          */
3396         case TCP_LAST_ACK:
3397                 if (th->rst) {
3398                         sk->zapped=1;
3399                         sk->err = ECONNRESET;
3400                         sk->state = TCP_CLOSE;
3401                         sk->shutdown = SHUTDOWN_MASK;
3402                         if (!sk->dead) {
3403                                 sk->state_change(sk);
3404                         }
3405                         kfree_skb(skb, FREE_READ);
3406                         release_sock(sk);
3407                         return(0);
3408                 }
3409 
3410         case TCP_ESTABLISHED:
3411         case TCP_CLOSE_WAIT:
3412         case TCP_CLOSING:
3413         case TCP_FIN_WAIT1:
3414         case TCP_FIN_WAIT2:
3415         case TCP_TIME_WAIT:
3416                 if (!tcp_sequence(sk, th, len, opt, saddr,dev)) {
3417                         kfree_skb(skb, FREE_READ);
3418                         release_sock(sk);
3419                         return(0);
3420                 }
3421 
3422                 if (th->rst) 
3423                 {
3424                         tcp_statistics.TcpEstabResets++;
3425                         tcp_statistics.TcpCurrEstab--;
3426                         sk->zapped=1;
3427                         /* This means the thing should really be closed. */
3428                         sk->err = ECONNRESET;
3429 
3430                         if (sk->state == TCP_CLOSE_WAIT) 
3431                         {
3432                                 sk->err = EPIPE;
3433                         }
3434 
3435                         /*
3436                          * A reset with a fin just means that
3437                          * the data was not all read.
3438                          */
3439                         sk->state = TCP_CLOSE;
3440                         sk->shutdown = SHUTDOWN_MASK;
3441                         if (!sk->dead) 
3442                         {
3443                                 sk->state_change(sk);
3444                         }
3445                         kfree_skb(skb, FREE_READ);
3446                         release_sock(sk);
3447                         return(0);
3448                 }
3449                 if (th->syn) 
3450                 {
3451                         tcp_statistics.TcpCurrEstab--;
3452                         tcp_statistics.TcpEstabResets++;
3453                         sk->err = ECONNRESET;
3454                         sk->state = TCP_CLOSE;
3455                         sk->shutdown = SHUTDOWN_MASK;
3456                         tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
3457                         if (!sk->dead) {
3458                                 sk->state_change(sk);
3459                         }
3460                         kfree_skb(skb, FREE_READ);
3461                         release_sock(sk);
3462                         return(0);
3463                 }
3464 
3465                 if (th->ack && !tcp_ack(sk, th, saddr, len)) {
3466                         kfree_skb(skb, FREE_READ);
3467                         release_sock(sk);
3468                         return(0);
3469                 }
3470 
3471                 if (tcp_urg(sk, th, saddr, len)) {
3472                         kfree_skb(skb, FREE_READ);
3473                         release_sock(sk);
3474                         return(0);
3475                 }
3476 
3477                 if (tcp_data(skb, sk, saddr, len)) {
3478                         kfree_skb(skb, FREE_READ);
3479                         release_sock(sk);
3480                         return(0);
3481                 }
3482 
3483                 /* Moved: you must do data then fin bit */
3484                 if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) {
3485                         kfree_skb(skb, FREE_READ);
3486                         release_sock(sk);
3487                         return(0);
3488                 }
3489 
3490                 release_sock(sk);
3491                 return(0);
3492 
3493         case TCP_CLOSE:
3494                 if (sk->dead || sk->daddr) {
3495                         kfree_skb(skb, FREE_READ);
3496                         release_sock(sk);
3497                         return(0);
3498                 }
3499 
3500                 if (!th->rst) {
3501                         if (!th->ack)
3502                                 th->ack_seq = 0;
3503                         tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3504                 }
3505                 kfree_skb(skb, FREE_READ);
3506                 release_sock(sk);
3507                 return(0);
3508 
3509         case TCP_LISTEN:
3510                 if (th->rst) {
3511                         kfree_skb(skb, FREE_READ);
3512                         release_sock(sk);
3513                         return(0);
3514                 }
3515                 if (th->ack) {
3516                         tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3517                         kfree_skb(skb, FREE_READ);
3518                         release_sock(sk);
3519                         return(0);
3520                 }
3521 
3522                 if (th->syn) 
3523                 {
3524                         /*
3525                          * Now we just put the whole thing including
3526                          * the header and saddr, and protocol pointer
3527                          * into the buffer.  We can't respond until the
3528                          * user tells us to accept the connection.
3529                          */
3530                         tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
3531                         release_sock(sk);
3532                         return(0);
3533                 }
3534 
3535                 kfree_skb(skb, FREE_READ);
3536                 release_sock(sk);
3537                 return(0);
3538 
3539         case TCP_SYN_RECV:
3540                 if (th->syn) {
3541                         /* Probably a retransmitted syn */
3542                         kfree_skb(skb, FREE_READ);
3543                         release_sock(sk);
3544                         return(0);
3545                 }
3546 
3547 
3548         default:
3549                 if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
3550                 {
3551                         kfree_skb(skb, FREE_READ);
3552                         release_sock(sk);
3553                         return(0);
3554                 }
3555 
3556         case TCP_SYN_SENT:
3557                 if (th->rst) 
3558                 {
3559                         tcp_statistics.TcpAttemptFails++;
3560                         sk->err = ECONNREFUSED;
3561                         sk->state = TCP_CLOSE;
3562                         sk->shutdown = SHUTDOWN_MASK;
3563                         sk->zapped = 1;
3564                         if (!sk->dead) 
3565                         {
3566                                 sk->state_change(sk);
3567                         }
3568                         kfree_skb(skb, FREE_READ);
3569                         release_sock(sk);
3570                         return(0);
3571                 }
3572                 if (!th->ack) 
3573                 {
3574                         if (th->syn) 
3575                         {
3576                                 sk->state = TCP_SYN_RECV;
3577                         }
3578 
3579                         kfree_skb(skb, FREE_READ);
3580                         release_sock(sk);
3581                         return(0);
3582                 }
3583 
3584                 switch(sk->state) 
3585                 {
3586                         case TCP_SYN_SENT:
3587                                 if (!tcp_ack(sk, th, saddr, len)) 
3588                                 {
3589                                         tcp_statistics.TcpAttemptFails++;
3590                                         tcp_reset(daddr, saddr, th,
3591                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3592                                         kfree_skb(skb, FREE_READ);
3593                                         release_sock(sk);
3594                                         return(0);
3595                                 }
3596 
3597                                 /*
3598                                  * If the syn bit is also set, switch to
3599                                  * tcp_syn_recv, and then to established.
3600                                  */
3601                                 if (!th->syn) 
3602                                 {
3603                                         kfree_skb(skb, FREE_READ);
3604                                         release_sock(sk);
3605                                         return(0);
3606                                 }
3607 
3608                                 /* Ack the syn and fall through. */
3609                                 sk->acked_seq = th->seq+1;
3610                                 sk->fin_seq = th->seq;
3611                                 tcp_send_ack(sk->sent_seq, th->seq+1,
3612                                                         sk, th, sk->daddr);
3613         
3614                         case TCP_SYN_RECV:
3615                                 if (!tcp_ack(sk, th, saddr, len)) 
3616                                 {
3617                                         tcp_statistics.TcpAttemptFails++;
3618                                         tcp_reset(daddr, saddr, th,
3619                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
3620                                         kfree_skb(skb, FREE_READ);
3621                                         release_sock(sk);
3622                                         return(0);
3623                                 }
3624 
3625                                 tcp_statistics.TcpCurrEstab++;
3626                                 sk->state = TCP_ESTABLISHED;
3627 
3628                                 /*
3629                                  * Now we need to finish filling out
3630                                  * some of the tcp header.
3631                                  */
3632                                 /* We need to check for mtu info. */
3633                                 tcp_options(sk, th);
3634                                 sk->dummy_th.dest = th->source;
3635                                 sk->copied_seq = sk->acked_seq-1;
3636                                 if (!sk->dead) {
3637                                         sk->state_change(sk);
3638                                 }
3639 
3640                                 /*
3641                                  * We've already processed his first
3642                                  * ack.  In just about all cases that
3643                                  * will have set max_window.  This is
3644                                  * to protect us against the possibility
3645                                  * that the initial window he sent was 0.
3646                                  * This must occur after tcp_options, which
3647                                  * sets sk->mtu.
3648                                  */
3649                                 if (sk->max_window == 0) {
3650                                   sk->max_window = 32;
3651                                   sk->mss = min(sk->max_window, sk->mtu);
3652                                 }
3653 
3654                                 /*
3655                                  * Now process the rest like we were
3656                                  * already in the established state.
3657                                  */
3658                                 if (th->urg) {
3659                                         if (tcp_urg(sk, th, saddr, len)) { 
3660                                                 kfree_skb(skb, FREE_READ);
3661                                                 release_sock(sk);
3662                                                 return(0);
3663                                         }
3664                         }
3665                         if (tcp_data(skb, sk, saddr, len))
3666                                                 kfree_skb(skb, FREE_READ);
3667 
3668                         if (th->fin) tcp_fin(skb, sk, th, saddr, dev);
3669                         release_sock(sk);
3670                         return(0);
3671                 }
3672 
3673                 if (th->urg) {
3674                         if (tcp_urg(sk, th, saddr, len)) {
3675                                 kfree_skb(skb, FREE_READ);
3676                                 release_sock(sk);
3677                                 return(0);
3678                         }
3679                 }
3680 
3681                 if (tcp_data(skb, sk, saddr, len)) {
3682                         kfree_skb(skb, FREE_READ);
3683                         release_sock(sk);
3684                         return(0);
3685                 }
3686 
3687                 if (!th->fin) {
3688                         release_sock(sk);
3689                         return(0);
3690                 }
3691                 tcp_fin(skb, sk, th, saddr, dev);
3692                 release_sock(sk);
3693                 return(0);
3694         }
3695 }
3696 
3697 
3698 /*
3699  * This routine sends a packet with an out of date sequence
3700  * number. It assumes the other end will try to ack it.
3701  */
3702 
3703 static void tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3704 {
3705         struct sk_buff *buff;
3706         struct tcphdr *t1;
3707         struct device *dev=NULL;
3708         int tmp;
3709 
3710         if (sk->zapped)
3711                 return; /* Afer a valid reset we can send no more */
3712 
3713         /*
3714          * Write data can still be transmitted/retransmitted in the
3715          * following states.  If any other state is encountered, return.
3716          */
3717 
3718         if (sk->state != TCP_ESTABLISHED && 
3719             sk->state != TCP_CLOSE_WAIT &&
3720             sk->state != TCP_FIN_WAIT1 && 
3721             sk->state != TCP_LAST_ACK &&
3722             sk->state != TCP_CLOSING
3723         ) {
3724                 return;
3725         }
3726 
3727         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
3728         if (buff == NULL) 
3729                 return;
3730 
3731         buff->len = sizeof(struct tcphdr);
3732         buff->free = 1;
3733         buff->sk = sk;
3734         buff->localroute = sk->localroute;
3735 
3736         t1 = (struct tcphdr *) buff->data;
3737 
3738         /* Put in the IP header and routing stuff. */
3739         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3740                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
3741         if (tmp < 0) 
3742         {
3743                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3744                 return;
3745         }
3746 
3747         buff->len += tmp;
3748         t1 = (struct tcphdr *)((char *)t1 +tmp);
3749 
3750         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
3751 
3752         /*
3753          * Use a previous sequence.
3754          * This should cause the other end to send an ack.
3755          */
3756         t1->seq = htonl(sk->sent_seq-1);
3757         t1->ack = 1; 
3758         t1->res1= 0;
3759         t1->res2= 0;
3760         t1->rst = 0;
3761         t1->urg = 0;
3762         t1->psh = 0;
3763         t1->fin = 0;
3764         t1->syn = 0;
3765         t1->ack_seq = ntohl(sk->acked_seq);
3766         t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
3767         t1->doff = sizeof(*t1)/4;
3768         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
3769 
3770          /*     Send it and free it.
3771           *     This will prevent the timer from automatically being restarted.
3772           */
3773         sk->prot->queue_xmit(sk, dev, buff, 1);
3774         tcp_statistics.TcpOutSegs++;
3775 }
3776 
3777 void
3778 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3779 {
3780         if (sk->zapped)
3781                 return;         /* Afer a valid reset we can send no more */
3782 
3783         tcp_write_wakeup(sk);
3784 
3785         sk->backoff++;
3786         sk->rto = min(sk->rto << 1, 120*HZ);
3787         reset_timer (sk, TIME_PROBE0, sk->rto);
3788         sk->retransmits++;
3789         sk->prot->retransmits ++;
3790 }
3791 
3792 /*
3793  *      Socket option code for TCP. 
3794  */
3795   
3796 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
3797 {
3798         int val,err;
3799 
3800         if(level!=SOL_TCP)
3801                 return ip_setsockopt(sk,level,optname,optval,optlen);
3802 
3803         if (optval == NULL) 
3804                 return(-EINVAL);
3805 
3806         err=verify_area(VERIFY_READ, optval, sizeof(int));
3807         if(err)
3808                 return err;
3809         
3810         val = get_fs_long((unsigned long *)optval);
3811 
3812         switch(optname)
3813         {
3814                 case TCP_MAXSEG:
3815 /*                      if(val<200||val>2048 || val>sk->mtu) */
3816 /*
3817  * values greater than interface MTU won't take effect.  however at
3818  * the point when this call is done we typically don't yet know
3819  * which interface is going to be used
3820  */
3821                         if(val<1||val>MAX_WINDOW)
3822                                 return -EINVAL;
3823                         sk->user_mss=val;
3824                         return 0;
3825                 case TCP_NODELAY:
3826                         sk->nonagle=(val==0)?0:1;
3827                         return 0;
3828                 default:
3829                         return(-ENOPROTOOPT);
3830         }
3831 }
3832 
3833 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
3834 {
3835         int val,err;
3836 
3837         if(level!=SOL_TCP)
3838                 return ip_getsockopt(sk,level,optname,optval,optlen);
3839                         
3840         switch(optname)
3841         {
3842                 case TCP_MAXSEG:
3843                         val=sk->user_mss;
3844                         break;
3845                 case TCP_NODELAY:
3846                         val=sk->nonagle;        /* Until Johannes stuff is in */
3847                         break;
3848                 default:
3849                         return(-ENOPROTOOPT);
3850         }
3851         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
3852         if(err)
3853                 return err;
3854         put_fs_long(sizeof(int),(unsigned long *) optlen);
3855 
3856         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
3857         if(err)
3858                 return err;
3859         put_fs_long(val,(unsigned long *)optval);
3860 
3861         return(0);
3862 }       
3863 
3864 
3865 struct proto tcp_prot = {
3866   sock_wmalloc,
3867   sock_rmalloc,
3868   sock_wfree,
3869   sock_rfree,
3870   sock_rspace,
3871   sock_wspace,
3872   tcp_close,
3873   tcp_read,
3874   tcp_write,
3875   tcp_sendto,
3876   tcp_recvfrom,
3877   ip_build_header,
3878   tcp_connect,
3879   tcp_accept,
3880   ip_queue_xmit,
3881   tcp_retransmit,
3882   tcp_write_wakeup,
3883   tcp_read_wakeup,
3884   tcp_rcv,
3885   tcp_select,
3886   tcp_ioctl,
3887   NULL,
3888   tcp_shutdown,
3889   tcp_setsockopt,
3890   tcp_getsockopt,
3891   128,
3892   0,
3893   {NULL,},
3894   "TCP"
3895 };

/* [previous][next][first][last][top][bottom][index][help] */