root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. print_th
  3. get_firstr
  4. diff
  5. tcp_select_window
  6. tcp_time_wait
  7. tcp_retransmit
  8. tcp_err
  9. tcp_readable
  10. tcp_select
  11. tcp_ioctl
  12. tcp_check
  13. tcp_send_check
  14. tcp_send_skb
  15. tcp_dequeue_partial
  16. tcp_send_partial
  17. tcp_enqueue_partial
  18. tcp_send_ack
  19. tcp_build_header
  20. tcp_write
  21. tcp_sendto
  22. tcp_read_wakeup
  23. cleanup_rbuf
  24. tcp_read_urg
  25. tcp_read
  26. tcp_shutdown
  27. tcp_recvfrom
  28. tcp_reset
  29. tcp_options
  30. default_mask
  31. tcp_conn_request
  32. tcp_close
  33. tcp_write_xmit
  34. sort_send
  35. tcp_ack
  36. tcp_data
  37. tcp_check_urg
  38. tcp_urg
  39. tcp_fin
  40. tcp_accept
  41. tcp_connect
  42. tcp_sequence
  43. tcp_rcv
  44. tcp_write_wakeup
  45. tcp_send_probe0
  46. tcp_setsockopt
  47. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *
  16  * Fixes:       
  17  *              Alan Cox        :       Numerous verify_area() calls
  18  *              Alan Cox        :       Set the ACK bit on a reset
  19  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  20  *                                      and was trying to connect (tcp_err()).
  21  *              Alan Cox        :       All icmp error handling was broken
  22  *                                      pointers passed where wrong and the
  23  *                                      socket was looked up backwards. Nobody
  24  *                                      tested any icmp error code obviously.
  25  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  26  *                                      on errors. select behaves and the icmp error race
  27  *                                      has gone by moving it into sock.c
  28  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  29  *                                      packets for unknown sockets.
  30  *              Alan Cox        :       tcp option processing.
  31  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  32  *              Herp Rosmanith  :       More reset fixes
  33  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  34  *                                      any kind of RST is right out.
  35  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  36  *                                      otherwise odd bits of prattle escape still
  37  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  38  *                                      LAN workplace lockups.
  39  *              Alan Cox        :       Some tidyups using the new skb list facilities
  40  *              Alan Cox        :       sk->keepopen now seems to work
  41  *              Alan Cox        :       Pulls options out correctly on accepts
  42  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  43  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  44  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  45  *              Alan Cox        :       Added some beter commenting, as the tcp is hard to follow
  46  *              Alan Cox        :       Removed incorrect check for 20 * psh
  47  *      Michael O'Reilly        :       ack < copied bug fix.
  48  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  49  *              Alan Cox        :       FIN with no memory -> CRASH
  50  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  51  *              Alan Cox        :       Added TCP options (SOL_TCP)
  52  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  53  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  54  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  55  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  56  *              Alan Cox        :       Put in missing check for SYN bit.
  57  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  58  *                                      window non shrink trick.
  59  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  60  *              Charles Hedrick :       TCP fixes
  61  *              Toomas Tamm     :       TCP window fixes
  62  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  63  *              Charles Hedrick :       Window fix
  64  *              Linus           :       Rewrote tcp_read() and URG handling
  65  *                                      completely
  66  *
  67  *
  68  * To Fix:
  69  *                      Possibly a problem with accept(). BSD accept never fails after
  70  *              it causes a select. Linux can - given the official select semantics I
  71  *              feel that _really_ its the BSD network programs that are bust (notably
  72  *              inetd, which hangs occasionally because of this).
  73  *                      Add VJ Fastrecovery algorithm ?
  74  *                      Protocol closedown badly messed up.
  75  *                      Incompatiblity with spider ports (tcp hangs on that 
  76  *                      socket occasionally).
  77  *              MSG_PEEK and read on same socket at once can cause crashes.
  78  *
  79  *              This program is free software; you can redistribute it and/or
  80  *              modify it under the terms of the GNU General Public License
  81  *              as published by the Free Software Foundation; either version
  82  *              2 of the License, or(at your option) any later version.
  83  */
  84 #include <linux/types.h>
  85 #include <linux/sched.h>
  86 #include <linux/mm.h>
  87 #include <linux/string.h>
  88 #include <linux/socket.h>
  89 #include <linux/sockios.h>
  90 #include <linux/termios.h>
  91 #include <linux/in.h>
  92 #include <linux/fcntl.h>
  93 #include "inet.h"
  94 #include "dev.h"
  95 #include "ip.h"
  96 #include "protocol.h"
  97 #include "icmp.h"
  98 #include "tcp.h"
  99 #include "skbuff.h"
 100 #include "sock.h"
 101 #include "arp.h"
 102 #include <linux/errno.h>
 103 #include <linux/timer.h>
 104 #include <asm/system.h>
 105 #include <asm/segment.h>
 106 #include <linux/mm.h>
 107 
 108 #define SEQ_TICK 3
 109 unsigned long seq_offset;
 110 #define SUBNETSARELOCAL
 111 
 112 static __inline__ int 
 113 min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 114 {
 115   if (a < b) return(a);
 116   return(b);
 117 }
 118 
 119 
 120 void
 121 print_th(struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
 122 {
 123   unsigned char *ptr;
 124 
 125   if (inet_debug != DBG_TCP) return;
 126 
 127   printk("TCP header:\n");
 128   ptr =(unsigned char *)(th + 1);
 129   printk("    source=%d, dest=%d, seq =%ld, ack_seq = %ld\n",
 130         ntohs(th->source), ntohs(th->dest),
 131         ntohl(th->seq), ntohl(th->ack_seq));
 132   printk("    fin=%d, syn=%d, rst=%d, psh=%d, ack=%d, urg=%d res1=%d res2=%d\n",
 133         th->fin, th->syn, th->rst, th->psh, th->ack,
 134         th->urg, th->res1, th->res2);
 135   printk("    window = %d, check = %d urg_ptr = %d\n",
 136         ntohs(th->window), ntohs(th->check), ntohs(th->urg_ptr));
 137   printk("    doff = %d\n", th->doff);
 138   printk("    options = %d %d %d %d\n", ptr[0], ptr[1], ptr[2], ptr[3]);
 139  }
 140 
 141 
 142 
 143 /* This routine grabs the first thing off of a rcv queue. */
 144 static struct sk_buff *
 145 get_firstr(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 146 {
 147   return skb_dequeue(&sk->rqueue);
 148 }
 149 
 150 /*
 151  *      Difference between two values in tcp ack terms.
 152  */
 153 
 154 static long
 155 diff(unsigned long seq1, unsigned long seq2)
     /* [previous][next][first][last][top][bottom][index][help] */
 156 {
 157   long d;
 158 
 159   d = seq1 - seq2;
 160   if (d > 0) return(d);
 161 
 162   /* I hope this returns what I want. */
 163   return(~d+1);
 164 }
 165 
 166 /* This routine picks a TCP windows for a socket based on
 167    the following constraints
 168    
 169    1. The window can never be shrunk once it is offered (RFC 793)
 170    2. We limit memory per socket
 171    
 172    For now we use NET2E3's heuristic of offering half the memory
 173    we have handy. All is not as bad as this seems however because
 174    of two things. Firstly we will bin packets even within the window
 175    in order to get the data we are waiting for into the memory limit.
 176    Secondly we bin common duplicate forms at receive time
 177 
 178    Better heuristics welcome
 179 */
 180    
 181 static int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 182 {
 183         int new_window = sk->prot->rspace(sk);
 184 
 185 /*
 186  * two things are going on here.  First, we don't ever offer a
 187  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 188  * receiver side of SWS as specified in RFC1122.
 189  * Second, we always give them at least the window they
 190  * had before, in order to avoid retracting window.  This
 191  * is technically allowed, but RFC1122 advises against it and
 192  * in practice it causes trouble.
 193  */
 194         if (new_window < min(sk->mss, MAX_WINDOW/2) ||
 195             new_window < sk->window)
 196           return(sk->window);
 197         return(new_window);
 198 }
 199 
 200 /* Enter the time wait state. */
 201 
 202 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 203 {
 204   sk->state = TCP_TIME_WAIT;
 205   sk->shutdown = SHUTDOWN_MASK;
 206   if (!sk->dead)
 207         sk->state_change(sk);
 208   reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 209 }
 210 
 211 /*
 212  *      A timer event has trigger a tcp retransmit timeout. The
 213  *      socket xmit queue is ready and set up to send. Because
 214  *      the ack receive code keeps the queue straight we do
 215  *      nothing clever here.
 216  */
 217 
 218 static void
 219 tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 220 {
 221   if (all) {
 222         ip_retransmit(sk, all);
 223         return;
 224   }
 225 
 226   sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 227   /* sk->ssthresh in theory can be zero.  I guess that's OK */
 228   sk->cong_count = 0;
 229 
 230   sk->cong_window = 1;
 231 
 232   /* Do the actual retransmit. */
 233   ip_retransmit(sk, all);
 234 }
 235 
 236 
 237 /*
 238  * This routine is called by the ICMP module when it gets some
 239  * sort of error condition.  If err < 0 then the socket should
 240  * be closed and the error returned to the user.  If err > 0
 241  * it's just the icmp type << 8 | icmp code.  After adjustment
 242  * header points to the first 8 bytes of the tcp header.  We need
 243  * to find the appropriate port.
 244  */
 245 void
 246 tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 247         unsigned long saddr, struct inet_protocol *protocol)
 248 {
 249   struct tcphdr *th;
 250   struct sock *sk;
 251   struct iphdr *iph=(struct iphdr *)header;
 252   
 253   header+=4*iph->ihl;
 254    
 255   DPRINTF((DBG_TCP, "TCP: tcp_err(%d, hdr=%X, daddr=%X saddr=%X, protocol=%X)\n",
 256                                         err, header, daddr, saddr, protocol));
 257 
 258   th =(struct tcphdr *)header;
 259   sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr);
 260   print_th(th);
 261 
 262   if (sk == NULL) return;
 263   
 264   if(err<0)
 265   {
 266         sk->err = -err;
 267         sk->error_report(sk);
 268         return;
 269   }
 270 
 271   if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) {
 272         /*
 273          * FIXME:
 274          * For now we will just trigger a linear backoff.
 275          * The slow start code should cause a real backoff here.
 276          */
 277         if (sk->cong_window > 4) sk->cong_window--;
 278         return;
 279   }
 280 
 281   DPRINTF((DBG_TCP, "TCP: icmp_err got error\n"));
 282   sk->err = icmp_err_convert[err & 0xff].errno;
 283 
 284   /*
 285    * If we've already connected we will keep trying
 286    * until we time out, or the user gives up.
 287    */
 288   if (icmp_err_convert[err & 0xff].fatal) {
 289         if (sk->state == TCP_SYN_SENT) {
 290                 sk->state = TCP_CLOSE;
 291                 sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 292         }
 293   }
 294   return;
 295 }
 296 
 297 
 298 /*
 299  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 300  *      in the received data queue (ie a frame missing that needs sending to us)
 301  */
 302 
 303 static int
 304 tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 305 {
 306   unsigned long counted;
 307   unsigned long amount;
 308   struct sk_buff *skb;
 309   int count=0;
 310   int sum;
 311   unsigned long flags;
 312 
 313   DPRINTF((DBG_TCP, "tcp_readable(sk=%X)\n", sk));
 314   if(sk && sk->debug)
 315         printk("tcp_readable: %p - ",sk);
 316 
 317   if (sk == NULL || skb_peek(&sk->rqueue) == NULL)      /* Empty sockets are easy! */
 318   {
 319         if(sk && sk->debug) 
 320                 printk("empty\n");
 321         return(0);
 322   }
 323   
 324   counted = sk->copied_seq+1;   /* Where we are at the moment */
 325   amount = 0;
 326   
 327   save_flags(flags);            /* So nobody adds things at the wrong moment */
 328   cli();
 329   skb =(struct sk_buff *)sk->rqueue;
 330 
 331   /* Do until a push or until we are out of data. */
 332   do {
 333         count++;
 334 #ifdef OLD      
 335         /* This is wrong: It breaks Chameleon amongst other stacks */
 336         if (count > 20) {
 337                 restore_flags(flags);
 338                 DPRINTF((DBG_TCP, "tcp_readable, more than 20 packets without a psh\n"));
 339                 printk("tcp_read: possible read_queue corruption.\n");
 340                 return(amount);
 341         }
 342 #endif  
 343         if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 344                 break;
 345         sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 346         if (skb->h.th->syn)
 347                 sum++;
 348         if (sum >= 0) {                                 /* Add it up, move on */
 349                 amount += sum;
 350                 if (skb->h.th->syn) amount--;
 351                 counted += sum;
 352         }
 353         if (amount && skb->h.th->psh) break;
 354         skb =(struct sk_buff *)skb->next;               /* Move along */
 355   } while(skb != sk->rqueue);
 356   if (amount && !sk->urginline && sk->urg_data &&
 357       (sk->urg_seq - sk->copied_seq) <= (counted - sk->copied_seq))
 358         amount--;               /* don't count urg data */
 359   restore_flags(flags);
 360   DPRINTF((DBG_TCP, "tcp readable returning %d bytes\n", amount));
 361   if(sk->debug)
 362         printk("got %lu bytes.\n",amount);
 363   return(amount);
 364 }
 365 
 366 
 367 /*
 368  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 369  *      listening socket has a receive queue of sockets to accept.
 370  */
 371 
 372 static int
 373 tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 374 {
 375   DPRINTF((DBG_TCP, "tcp_select(sk=%X, sel_type = %d, wait = %X)\n",
 376                                                 sk, sel_type, wait));
 377 
 378   sk->inuse = 1;
 379   switch(sel_type) {
 380         case SEL_IN:
 381                 if(sk->debug)
 382                         printk("select in");
 383                 select_wait(sk->sleep, wait);
 384                 if(sk->debug)
 385                         printk("-select out");
 386                 if (skb_peek(&sk->rqueue) != NULL) {
 387                         if (sk->state == TCP_LISTEN || tcp_readable(sk)) {
 388                                 release_sock(sk);
 389                                 if(sk->debug)
 390                                         printk("-select ok data\n");
 391                                 return(1);
 392                         }
 393                 }
 394                 if (sk->err != 0)       /* Receiver error */
 395                 {
 396                         release_sock(sk);
 397                         if(sk->debug)
 398                                 printk("-select ok error");
 399                         return(1);
 400                 }
 401                 if (sk->shutdown & RCV_SHUTDOWN) {
 402                         release_sock(sk);
 403                         if(sk->debug)
 404                                 printk("-select ok down\n");
 405                         return(1);
 406                 } else {
 407                         release_sock(sk);
 408                         if(sk->debug)
 409                                 printk("-select fail\n");
 410                         return(0);
 411                 }
 412         case SEL_OUT:
 413                 select_wait(sk->sleep, wait);
 414                 if (sk->shutdown & SEND_SHUTDOWN) {
 415                         DPRINTF((DBG_TCP,
 416                                 "write select on shutdown socket.\n"));
 417 
 418                         /* FIXME: should this return an error? */
 419                         release_sock(sk);
 420                         return(0);
 421                 }
 422 
 423                 /*
 424                  * FIXME:
 425                  * Hack so it will probably be able to write
 426                  * something if it says it's ok to write.
 427                  */
 428                 if (sk->prot->wspace(sk) >= sk->mss) {
 429                         release_sock(sk);
 430                         /* This should cause connect to work ok. */
 431                         if (sk->state == TCP_SYN_RECV ||
 432                             sk->state == TCP_SYN_SENT) return(0);
 433                         return(1);
 434                 }
 435                 DPRINTF((DBG_TCP,
 436                         "tcp_select: sleeping on write sk->wmem_alloc = %d, "
 437                         "sk->packets_out = %d\n"
 438                         "sk->wback = %X, sk->wfront = %X\n"
 439                         "sk->send_seq = %u, sk->window_seq=%u\n", 
 440                                 sk->wmem_alloc, sk->packets_out,
 441                                 sk->wback, sk->wfront,
 442                                 sk->send_seq, sk->window_seq));
 443 
 444                 release_sock(sk);
 445                 return(0);
 446         case SEL_EX:
 447                 select_wait(sk->sleep,wait);
 448                 if (sk->err || sk->urg_data) {
 449                         release_sock(sk);
 450                         return(1);
 451                 }
 452                 release_sock(sk);
 453                 return(0);
 454   }
 455 
 456   release_sock(sk);
 457   return(0);
 458 }
 459 
 460 
 461 int
 462 tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 463 {
 464   int err;
 465   DPRINTF((DBG_TCP, "tcp_ioctl(sk=%X, cmd = %d, arg=%X)\n", sk, cmd, arg));
 466   switch(cmd) {
 467         case DDIOCSDBG:
 468                 return(dbg_ioctl((void *) arg, DBG_TCP));
 469 
 470         case TIOCINQ:
 471 #ifdef FIXME    /* FIXME: */
 472         case FIONREAD:
 473 #endif
 474                 {
 475                         unsigned long amount;
 476 
 477                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 478 
 479                         sk->inuse = 1;
 480                         amount = tcp_readable(sk);
 481                         release_sock(sk);
 482                         DPRINTF((DBG_TCP, "returning %d\n", amount));
 483                         err=verify_area(VERIFY_WRITE,(void *)arg,
 484                                                    sizeof(unsigned long));
 485                         if(err)
 486                                 return err;
 487                         put_fs_long(amount,(unsigned long *)arg);
 488                         return(0);
 489                 }
 490         case SIOCATMARK:
 491                 {
 492                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
 493 
 494                         err = verify_area(VERIFY_WRITE,(void *) arg,
 495                                                   sizeof(unsigned long));
 496                         if (err)
 497                                 return err;
 498                         put_fs_long(answ,(int *) arg);
 499                         return(0);
 500                 }
 501         case TIOCOUTQ:
 502                 {
 503                         unsigned long amount;
 504 
 505                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 506                         amount = sk->prot->wspace(sk);
 507                         err=verify_area(VERIFY_WRITE,(void *)arg,
 508                                                    sizeof(unsigned long));
 509                         if(err)
 510                                 return err;
 511                         put_fs_long(amount,(unsigned long *)arg);
 512                         return(0);
 513                 }
 514         default:
 515                 return(-EINVAL);
 516   }
 517 }
 518 
 519 
 520 /* This routine computes a TCP checksum. */
 521 unsigned short
 522 tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 523           unsigned long saddr, unsigned long daddr)
 524 {     
 525   unsigned long sum;
 526    
 527   if (saddr == 0) saddr = my_addr();
 528   print_th(th);
 529   __asm__("\t addl %%ecx,%%ebx\n"
 530           "\t adcl %%edx,%%ebx\n"
 531           "\t adcl $0, %%ebx\n"
 532           : "=b"(sum)
 533           : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 534           : "cx","bx","dx" );
 535    
 536   if (len > 3) {
 537         __asm__("\tclc\n"
 538                 "1:\n"
 539                 "\t lodsl\n"
 540                 "\t adcl %%eax, %%ebx\n"
 541                 "\t loop 1b\n"
 542                 "\t adcl $0, %%ebx\n"
 543                 : "=b"(sum) , "=S"(th)
 544                 : "0"(sum), "c"(len/4) ,"1"(th)
 545                 : "ax", "cx", "bx", "si" );
 546   }
 547    
 548   /* Convert from 32 bits to 16 bits. */
 549   __asm__("\t movl %%ebx, %%ecx\n"
 550           "\t shrl $16,%%ecx\n"
 551           "\t addw %%cx, %%bx\n"
 552           "\t adcw $0, %%bx\n"
 553           : "=b"(sum)
 554           : "0"(sum)
 555           : "bx", "cx");
 556    
 557   /* Check for an extra word. */
 558   if ((len & 2) != 0) {
 559         __asm__("\t lodsw\n"
 560                 "\t addw %%ax,%%bx\n"
 561                 "\t adcw $0, %%bx\n"
 562                 : "=b"(sum), "=S"(th)
 563                 : "0"(sum) ,"1"(th)
 564                 : "si", "ax", "bx");
 565   }
 566    
 567   /* Now check for the extra byte. */
 568   if ((len & 1) != 0) {
 569         __asm__("\t lodsb\n"
 570                 "\t movb $0,%%ah\n"
 571                 "\t addw %%ax,%%bx\n"
 572                 "\t adcw $0, %%bx\n"
 573                 : "=b"(sum)
 574                 : "0"(sum) ,"S"(th)
 575                 : "si", "ax", "bx");
 576   }
 577    
 578   /* We only want the bottom 16 bits, but we never cleared the top 16. */
 579   return((~sum) & 0xffff);
 580 }
 581 
 582 
 583 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 584                 unsigned long daddr, int len, struct sock *sk)
 585 {
 586         th->check = 0;
 587         th->check = tcp_check(th, len, saddr, daddr);
 588         return;
 589 }
 590 
 591 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 592 {
 593         int size;
 594 
 595         /* length of packet (not counting length of pre-tcp headers) */
 596         size = skb->len - ((unsigned char *) skb->h.th - skb->data);
 597 
 598         /* sanity check it.. */
 599         if (size < sizeof(struct tcphdr) || size > skb->len) {
 600                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 601                         skb, skb->data, skb->h.th, skb->len);
 602                 kfree_skb(skb, FREE_WRITE);
 603                 return;
 604         }
 605 
 606         /* If we have queued a header size packet.. */
 607         if (size == sizeof(struct tcphdr)) {
 608                 /* If its got a syn or fin its notionally included in the size..*/
 609                 if(!skb->h.th->syn && !skb->h.th->fin) {
 610                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 611                         kfree_skb(skb,FREE_WRITE);
 612                         return;
 613                 }
 614         }
 615   
 616         /* We need to complete and send the packet. */
 617         tcp_send_check(skb->h.th, sk->saddr, sk->daddr, size, sk);
 618 
 619         skb->h.seq = sk->send_seq;
 620         if (after(sk->send_seq , sk->window_seq) ||
 621             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 622              sk->packets_out >= sk->cong_window) {
 623                 DPRINTF((DBG_TCP, "sk->cong_window = %d, sk->packets_out = %d\n",
 624                                         sk->cong_window, sk->packets_out));
 625                 DPRINTF((DBG_TCP, "sk->send_seq = %d, sk->window_seq = %d\n",
 626                                         sk->send_seq, sk->window_seq));
 627                 skb->next = NULL;
 628                 skb->magic = TCP_WRITE_QUEUE_MAGIC;
 629                 if (sk->wback == NULL) {
 630                         sk->wfront = skb;
 631                 } else {
 632                         sk->wback->next = skb;
 633                 }
 634                 sk->wback = skb;
 635                 if (before(sk->window_seq, sk->wfront->h.seq) &&
 636                     sk->send_head == NULL &&
 637                     sk->ack_backlog == 0)
 638                   reset_timer(sk, TIME_PROBE0, sk->rto);
 639         } else {
 640                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 641         }
 642 }
 643 
 644 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 645 {
 646         struct sk_buff * skb;
 647         unsigned long flags;
 648 
 649         save_flags(flags);
 650         cli();
 651         skb = sk->partial;
 652         if (skb) {
 653                 sk->partial = NULL;
 654                 del_timer(&sk->partial_timer);
 655         }
 656         restore_flags(flags);
 657         return skb;
 658 }
 659 
 660 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 661 {
 662         struct sk_buff *skb;
 663 
 664         if (sk == NULL)
 665                 return;
 666         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 667                 tcp_send_skb(sk, skb);
 668 }
 669 
 670 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 671 {
 672         struct sk_buff * tmp;
 673         unsigned long flags;
 674 
 675         save_flags(flags);
 676         cli();
 677         tmp = sk->partial;
 678         if (tmp)
 679                 del_timer(&sk->partial_timer);
 680         sk->partial = skb;
 681         sk->partial_timer.expires = HZ;
 682         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 683         sk->partial_timer.data = (unsigned long) sk;
 684         add_timer(&sk->partial_timer);
 685         restore_flags(flags);
 686         if (tmp)
 687                 tcp_send_skb(sk, tmp);
 688 }
 689 
 690 
 691 /* This routine sends an ack and also updates the window. */
 692 static void
 693 tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 694              struct sock *sk,
 695              struct tcphdr *th, unsigned long daddr)
 696 {
 697   struct sk_buff *buff;
 698   struct tcphdr *t1;
 699   struct device *dev = NULL;
 700   int tmp;
 701 
 702   if(sk->zapped)
 703         return;         /* We have been reset, we may not send again */
 704   /*
 705    * We need to grab some memory, and put together an ack,
 706    * and then put it into the queue to be sent.
 707    */
 708   buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 709   if (buff == NULL) {
 710         /* Force it to send an ack. */
 711         sk->ack_backlog++;
 712         if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) {
 713                 reset_timer(sk, TIME_WRITE, 10);
 714         }
 715 if (inet_debug == DBG_SLIP) printk("\rtcp_ack: malloc failed\n");
 716         return;
 717   }
 718 
 719   buff->mem_addr = buff;
 720   buff->mem_len = MAX_ACK_SIZE;
 721   buff->len = sizeof(struct tcphdr);
 722   buff->sk = sk;
 723   t1 =(struct tcphdr *) buff->data;
 724 
 725   /* Put in the IP header and routing stuff. */
 726   tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 727                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 728   if (tmp < 0) {
 729         buff->free=1;
 730         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 731 if (inet_debug == DBG_SLIP) printk("\rtcp_ack: build_header failed\n");
 732         return;
 733   }
 734   buff->len += tmp;
 735   t1 =(struct tcphdr *)((char *)t1 +tmp);
 736 
 737   /* FIXME: */
 738   memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 739 
 740   /* swap the send and the receive. */
 741   t1->dest = th->source;
 742   t1->source = th->dest;
 743   t1->seq = ntohl(sequence);
 744   t1->ack = 1;
 745   sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
 746   t1->window = ntohs(sk->window);
 747   t1->res1 = 0;
 748   t1->res2 = 0;
 749   t1->rst = 0;
 750   t1->urg = 0;
 751   t1->syn = 0;
 752   t1->psh = 0;
 753   t1->fin = 0;
 754   if (ack == sk->acked_seq) {
 755         sk->ack_backlog = 0;
 756         sk->bytes_rcv = 0;
 757         sk->ack_timed = 0;
 758         if (sk->send_head == NULL && sk->wfront == NULL && sk->timeout == TIME_WRITE) 
 759         {
 760                 if(sk->keepopen)
 761                         reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 762                 else
 763                         delete_timer(sk);
 764         }
 765   }
 766   t1->ack_seq = ntohl(ack);
 767   t1->doff = sizeof(*t1)/4;
 768   tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 769   if (sk->debug)
 770          printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 771   sk->prot->queue_xmit(sk, dev, buff, 1);
 772 }
 773 
 774 
 775 /* This routine builds a generic TCP header. */
 776 static int
 777 tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 778 {
 779 
 780   /* FIXME: want to get rid of this. */
 781   memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 782   th->seq = htonl(sk->send_seq);
 783   th->psh =(push == 0) ? 1 : 0;
 784   th->doff = sizeof(*th)/4;
 785   th->ack = 1;
 786   th->fin = 0;
 787   sk->ack_backlog = 0;
 788   sk->bytes_rcv = 0;
 789   sk->ack_timed = 0;
 790   th->ack_seq = htonl(sk->acked_seq);
 791   sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/;
 792   th->window = htons(sk->window);
 793 
 794   return(sizeof(*th));
 795 }
 796 
 797 /*
 798  * This routine copies from a user buffer into a socket,
 799  * and starts the transmit system.
 800  */
 801 static int
 802 tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 803           int len, int nonblock, unsigned flags)
 804 {
 805   int copied = 0;
 806   int copy;
 807   int tmp;
 808   struct sk_buff *skb;
 809   struct sk_buff *send_tmp;
 810   unsigned char *buff;
 811   struct proto *prot;
 812   struct device *dev = NULL;
 813 
 814   DPRINTF((DBG_TCP, "tcp_write(sk=%X, from=%X, len=%d, nonblock=%d, flags=%X)\n",
 815                                         sk, from, len, nonblock, flags));
 816 
 817   sk->inuse=1;
 818   prot = sk->prot;
 819   while(len > 0) {
 820         if (sk->err) {                  /* Stop on an error */
 821                 release_sock(sk);
 822                 if (copied) return(copied);
 823                 tmp = -sk->err;
 824                 sk->err = 0;
 825                 return(tmp);
 826         }
 827 
 828         /* First thing we do is make sure that we are established. */    
 829         if (sk->shutdown & SEND_SHUTDOWN) {
 830                 release_sock(sk);
 831                 sk->err = EPIPE;
 832                 if (copied) return(copied);
 833                 sk->err = 0;
 834                 return(-EPIPE);
 835         }
 836 
 837 
 838         /* Wait for a connection to finish. */
 839         
 840         while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) {
 841                 if (sk->err) {
 842                         release_sock(sk);
 843                         if (copied) return(copied);
 844                         tmp = -sk->err;
 845                         sk->err = 0;
 846                         return(tmp);
 847                 }
 848 
 849                 if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) {
 850                         release_sock(sk);
 851                         DPRINTF((DBG_TCP, "tcp_write: return 1\n"));
 852                         if (copied) return(copied);
 853 
 854                         if (sk->err) {
 855                                 tmp = -sk->err;
 856                                 sk->err = 0;
 857                                 return(tmp);
 858                         }
 859 
 860                         if (sk->keepopen) {
 861                                 send_sig(SIGPIPE, current, 0);
 862                         }
 863                         return(-EPIPE);
 864                 }
 865 
 866                 if (nonblock || copied) {
 867                         release_sock(sk);
 868                         DPRINTF((DBG_TCP, "tcp_write: return 2\n"));
 869                         if (copied) return(copied);
 870                         return(-EAGAIN);
 871                 }
 872 
 873                 release_sock(sk);
 874                 cli();
 875                 if (sk->state != TCP_ESTABLISHED &&
 876                     sk->state != TCP_CLOSE_WAIT && sk->err == 0) {
 877                         interruptible_sleep_on(sk->sleep);
 878                         if (current->signal & ~current->blocked) {
 879                                 sti();
 880                                 DPRINTF((DBG_TCP, "tcp_write: return 3\n"));
 881                                 if (copied) return(copied);
 882                                 return(-ERESTARTSYS);
 883                         }
 884                 }
 885                 sk->inuse = 1;
 886                 sti();
 887         }
 888 
 889 /*
 890  * The following code can result in copy <= if sk->mss is ever
 891  * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
 892  * sk->mtu is constant once SYN processing is finished.  I.e. we
 893  * had better not get here until we've seen his SYN and at least one
 894  * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
 895  * But ESTABLISHED should guarantee that.  sk->max_window is by definition
 896  * non-decreasing.  Note that any ioctl to set user_mss must be done
 897  * before the exchange of SYN's.  If the initial ack from the other
 898  * end has a window of 0, max_window and thus mss will both be 0.
 899  */
 900 
 901         /* Now we need to check if we have a half built packet. */
 902         if ((skb = tcp_dequeue_partial(sk)) != NULL) {
 903                 int hdrlen;
 904 
 905                  /* IP header + TCP header */
 906                 hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
 907                          + sizeof(struct tcphdr);
 908 
 909                 /* Add more stuff to the end of skb->len */
 910                 if (!(flags & MSG_OOB)) {
 911                         copy = min(sk->mss - (skb->len - hdrlen), len);
 912                         /* FIXME: this is really a bug. */
 913                         if (copy <= 0) {
 914                           printk("TCP: **bug**: \"copy\" <= 0!!\n");
 915                           copy = 0;
 916                         }
 917           
 918                         memcpy_fromfs(skb->data + skb->len, from, copy);
 919                         skb->len += copy;
 920                         from += copy;
 921                         copied += copy;
 922                         len -= copy;
 923                         sk->send_seq += copy;
 924                       }
 925                 if ((skb->len - hdrlen) >= sk->mss ||
 926                     (flags & MSG_OOB) ||
 927                     !sk->packets_out)
 928                         tcp_send_skb(sk, skb);
 929                 else
 930                         tcp_enqueue_partial(skb, sk);
 931                 continue;
 932         }
 933 
 934         /*
 935          * We also need to worry about the window.
 936          * If window < 1/2 the maximum window we've seen from this
 937          *   host, don't use it.  This is sender side
 938          *   silly window prevention, as specified in RFC1122.
 939          *   (Note that this is diffferent than earlier versions of
 940          *   SWS prevention, e.g. RFC813.).  What we actually do is 
 941          *   use the whole MSS.  Since the results in the right
 942          *   edge of the packet being outside the window, it will
 943          *   be queued for later rather than sent.
 944          */
 945 
 946         copy = diff(sk->window_seq, sk->send_seq);
 947         /* what if max_window == 1?  In that case max_window >> 1 is 0.
 948          * however in that case copy == max_window, so it's OK to use 
 949          * the window */
 950         if (copy < (sk->max_window >> 1))
 951           copy = sk->mss;
 952         copy = min(copy, sk->mss);
 953         copy = min(copy, len);
 954 
 955   /* We should really check the window here also. */
 956         send_tmp = NULL;
 957         if (copy < sk->mss && !(flags & MSG_OOB)) {
 958         /* We will release the socket incase we sleep here. */
 959           release_sock(sk);
 960           /* NB: following must be mtu, because mss can be increased.
 961            * mss is always <= mtu */
 962           skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header + sizeof(*skb), 0, GFP_KERNEL);
 963           sk->inuse = 1;
 964           send_tmp = skb;
 965         } else {
 966                 /* We will release the socket incase we sleep here. */
 967           release_sock(sk);
 968           skb = prot->wmalloc(sk, copy + prot->max_header + sizeof(*skb), 0, GFP_KERNEL);
 969           sk->inuse = 1;
 970         }
 971 
 972         /* If we didn't get any memory, we need to sleep. */
 973         if (skb == NULL) {
 974                 if (nonblock /* || copied */) {
 975                         release_sock(sk);
 976                         DPRINTF((DBG_TCP, "tcp_write: return 4\n"));
 977                         if (copied) return(copied);
 978                         return(-EAGAIN);
 979                 }
 980 
 981                 /* FIXME: here is another race condition. */
 982                 tmp = sk->wmem_alloc;
 983                 release_sock(sk);
 984                 cli();
 985                 /* Again we will try to avoid it. */
 986                 if (tmp <= sk->wmem_alloc &&
 987                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
 988                                 && sk->err == 0) {
 989                         interruptible_sleep_on(sk->sleep);
 990                         if (current->signal & ~current->blocked) {
 991                                 sti();
 992                                 DPRINTF((DBG_TCP, "tcp_write: return 5\n"));
 993                                 if (copied) return(copied);
 994                                 return(-ERESTARTSYS);
 995                         }
 996                 }
 997                 sk->inuse = 1;
 998                 sti();
 999                 continue;
1000         }
1001 
1002         skb->len = 0;
1003         skb->sk = sk;
1004         skb->free = 0;
1005 
1006         buff = skb->data;
1007 
1008         /*
1009          * FIXME: we need to optimize this.
1010          * Perhaps some hints here would be good.
1011          */
1012         tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1013                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1014         if (tmp < 0 ) {
1015                 prot->wfree(sk, skb->mem_addr, skb->mem_len);
1016                 release_sock(sk);
1017                 DPRINTF((DBG_TCP, "tcp_write: return 6\n"));
1018                 if (copied) return(copied);
1019                 return(tmp);
1020         }
1021         skb->len += tmp;
1022         skb->dev = dev;
1023         buff += tmp;
1024         skb->h.th =(struct tcphdr *) buff;
1025         tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1026         if (tmp < 0) {
1027                 prot->wfree(sk, skb->mem_addr, skb->mem_len);
1028                 release_sock(sk);
1029                 DPRINTF((DBG_TCP, "tcp_write: return 7\n"));
1030                 if (copied) return(copied);
1031                 return(tmp);
1032         }
1033 
1034         if (flags & MSG_OOB) {
1035                 ((struct tcphdr *)buff)->urg = 1;
1036                 ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1037         }
1038         skb->len += tmp;
1039         memcpy_fromfs(buff+tmp, from, copy);
1040 
1041         from += copy;
1042         copied += copy;
1043         len -= copy;
1044         skb->len += copy;
1045         skb->free = 0;
1046         sk->send_seq += copy;
1047 
1048         if (send_tmp != NULL && sk->packets_out) {
1049                 tcp_enqueue_partial(send_tmp, sk);
1050                 continue;
1051         }
1052         tcp_send_skb(sk, skb);
1053   }
1054   sk->err = 0;
1055 
1056 /*
1057  *      Nagles rule. Turn Nagle off with TCP_NODELAY for highly
1058  *      interactive fast network servers. It's meant to be on and
1059  *      it really improves the throughput though not the echo time
1060  *      on my slow slip link - Alan
1061  */
1062 
1063   /* Avoid possible race on send_tmp - c/o Johannes Stille */
1064   if(sk->partial && 
1065      ((!sk->packets_out) 
1066      /* If not nagling we can send on the before case too.. */
1067       || (sk->nonagle && before(sk->send_seq , sk->window_seq))
1068       ))
1069         tcp_send_partial(sk);
1070   /* -- */
1071   release_sock(sk);
1072   DPRINTF((DBG_TCP, "tcp_write: return 8\n"));
1073   return(copied);
1074 }
1075 
1076 
1077 static int
1078 tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1079            int len, int nonblock, unsigned flags,
1080            struct sockaddr_in *addr, int addr_len)
1081 {
1082   struct sockaddr_in sin;
1083 
1084   if (addr_len < sizeof(sin)) return(-EINVAL);
1085   memcpy_fromfs(&sin, addr, sizeof(sin));
1086   if (sin.sin_family && sin.sin_family != AF_INET) return(-EINVAL);
1087   if (sin.sin_port != sk->dummy_th.dest) return(-EINVAL);
1088   if (sin.sin_addr.s_addr != sk->daddr) return(-EINVAL);
1089   return(tcp_write(sk, from, len, nonblock, flags));
1090 }
1091 
1092 
1093 static void
1094 tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1095 {
1096   int tmp;
1097   struct device *dev = NULL;
1098   struct tcphdr *t1;
1099   struct sk_buff *buff;
1100 
1101   DPRINTF((DBG_TCP, "in tcp read wakeup\n"));
1102   if (!sk->ack_backlog) return;
1103 
1104   /*
1105    * FIXME: we need to put code here to prevent this routine from
1106    * being called.  Being called once in a while is ok, so only check
1107    * if this is the second time in a row.
1108    */
1109 
1110   /*
1111    * We need to grab some memory, and put together an ack,
1112    * and then put it into the queue to be sent.
1113    */
1114   buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1115   if (buff == NULL) {
1116         /* Try again real soon. */
1117         reset_timer(sk, TIME_WRITE, 10);
1118         return;
1119   }
1120 
1121   buff->mem_addr = buff;
1122   buff->mem_len = MAX_ACK_SIZE;
1123   buff->len = sizeof(struct tcphdr);
1124   buff->sk = sk;
1125 
1126   /* Put in the IP header and routing stuff. */
1127   tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1128                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1129   if (tmp < 0) {
1130         buff->free=1;
1131         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1132         return;
1133   }
1134 
1135   buff->len += tmp;
1136   t1 =(struct tcphdr *)(buff->data +tmp);
1137 
1138   memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1139   t1->seq = ntohl(sk->send_seq);
1140   t1->ack = 1;
1141   t1->res1 = 0;
1142   t1->res2 = 0;
1143   t1->rst = 0;
1144   t1->urg = 0;
1145   t1->syn = 0;
1146   t1->psh = 0;
1147   sk->ack_backlog = 0;
1148   sk->bytes_rcv = 0;
1149   sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
1150   t1->window = ntohs(sk->window);
1151   t1->ack_seq = ntohl(sk->acked_seq);
1152   t1->doff = sizeof(*t1)/4;
1153   tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1154   sk->prot->queue_xmit(sk, dev, buff, 1);
1155 }
1156 
1157 
1158 /*
1159  * FIXME:
1160  * This routine frees used buffers.
1161  * It should consider sending an ACK to let the
1162  * other end know we now have a bigger window.
1163  */
1164 static void
1165 cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1166 {
1167   unsigned long flags;
1168   int left;
1169   struct sk_buff *skb;
1170 
1171   if(sk->debug)
1172         printk("cleaning rbuf for sk=%p\n", sk);
1173   
1174   save_flags(flags);
1175   cli();
1176   
1177   left = sk->prot->rspace(sk);
1178  
1179   /*
1180    * We have to loop through all the buffer headers,
1181    * and try to free up all the space we can.
1182    */
1183   while((skb=skb_peek(&sk->rqueue)) != NULL ) 
1184   {
1185         if (!skb->used) 
1186                 break;
1187         skb_unlink(skb);
1188         skb->sk = sk;
1189         kfree_skb(skb, FREE_READ);
1190   }
1191 
1192   restore_flags(flags);
1193 
1194   /*
1195    * FIXME:
1196    * At this point we should send an ack if the difference
1197    * in the window, and the amount of space is bigger than
1198    * TCP_WINDOW_DIFF.
1199    */
1200   DPRINTF((DBG_TCP, "sk->window left = %d, sk->prot->rspace(sk)=%d\n",
1201                         sk->window - sk->bytes_rcv, sk->prot->rspace(sk)));
1202 
1203   if(sk->debug)
1204         printk("sk->rspace = %lu, was %d\n", sk->prot->rspace(sk),
1205                                             left);
1206   if (sk->prot->rspace(sk) != left) 
1207   {
1208         /*
1209          * This area has caused the most trouble.  The current strategy
1210          * is to simply do nothing if the other end has room to send at
1211          * least 3 full packets, because the ack from those will auto-
1212          * matically update the window.  If the other end doesn't think
1213          * we have much space left, but we have room for atleast 1 more
1214          * complete packet than it thinks we do, we will send an ack
1215          * immediatedly.  Otherwise we will wait up to .5 seconds in case
1216          * the user reads some more.
1217          */
1218         sk->ack_backlog++;
1219 /*
1220  * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1221  * if the other end is offering a window smaller than the agreed on MSS
1222  * (called sk->mtu here).  In theory there's no connection between send
1223  * and receive, and so no reason to think that they're going to send
1224  * small packets.  For the moment I'm using the hack of reducing the mss
1225  * only on the send side, so I'm putting mtu here.
1226  */
1227         if ((sk->prot->rspace(sk) > (sk->window - sk->bytes_rcv + sk->mtu))) {
1228                 /* Send an ack right now. */
1229                 tcp_read_wakeup(sk);
1230         } else {
1231                 /* Force it to send an ack soon. */
1232                 int was_active = del_timer(&sk->timer);
1233                 if (!was_active || TCP_ACK_TIME < sk->timer.expires) {
1234                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1235                 } else
1236                         add_timer(&sk->timer);
1237         }
1238   }
1239 } 
1240 
1241 
1242 /* Handle reading urgent data. */
1243 static int
1244 tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1245              unsigned char *to, int len, unsigned flags)
1246 {
1247         struct wait_queue wait = { current, NULL };
1248 
1249         while (len > 0) {
1250                 if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1251                         return -EINVAL;
1252                 if (sk->urg_data & URG_VALID) {
1253                         char c = sk->urg_data;
1254                         if (!(flags & MSG_PEEK))
1255                                 sk->urg_data = URG_READ;
1256                         put_fs_byte(c, to);
1257                         return 1;
1258                 }
1259 
1260                 if (sk->err) {
1261                         int tmp = -sk->err;
1262                         sk->err = 0;
1263                         return tmp;
1264                 }
1265 
1266                 if (sk->state == TCP_CLOSE || sk->done) {
1267                         if (!sk->done) {
1268                                 sk->done = 1;
1269                                 return 0;
1270                         }
1271                         return -ENOTCONN;
1272                 }
1273 
1274                 if (sk->shutdown & RCV_SHUTDOWN) {
1275                         sk->done = 1;
1276                         return 0;
1277                 }
1278 
1279                 if (nonblock)
1280                         return -EAGAIN;
1281 
1282                 if (current->signal & ~current->blocked)
1283                         return -ERESTARTSYS;
1284 
1285                 current->state = TASK_INTERRUPTIBLE;
1286                 add_wait_queue(sk->sleep, &wait);
1287                 if ((sk->urg_data & URG_NOTYET) && sk->err == 0 &&
1288                     !(sk->shutdown & RCV_SHUTDOWN))
1289                         schedule();
1290                 remove_wait_queue(sk->sleep, &wait);
1291                 current->state = TASK_RUNNING;
1292         }
1293         return 0;
1294 }
1295 
1296 
1297 /* This routine copies from a sock struct into the user buffer. */
1298 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1299         int len, int nonblock, unsigned flags)
1300 {
1301         struct wait_queue wait = { current, NULL };
1302         int copied = 0;
1303         unsigned long peek_seq;
1304         unsigned long *seq;
1305         unsigned long used;
1306         int err;
1307 
1308         if (len == 0)
1309                 return 0;
1310 
1311         if (len < 0)
1312                 return -EINVAL;
1313 
1314         err = verify_area(VERIFY_WRITE, to, len);
1315         if (err)
1316                 return err;
1317 
1318         /* This error should be checked. */
1319         if (sk->state == TCP_LISTEN)
1320                 return -ENOTCONN;
1321 
1322         /* Urgent data needs to be handled specially. */
1323         if (flags & MSG_OOB)
1324                 return tcp_read_urg(sk, nonblock, to, len, flags);
1325 
1326         peek_seq = sk->copied_seq;
1327         seq = &sk->copied_seq;
1328         if (flags & MSG_PEEK)
1329                 seq = &peek_seq;
1330 
1331         add_wait_queue(sk->sleep, &wait);
1332         sk->inuse = 1;
1333         while (len > 0) {
1334                 struct sk_buff * skb;
1335                 unsigned long offset;
1336         
1337                 /*
1338                  * are we at urgent data? Stop if we have read anything.
1339                  */
1340                 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1341                         break;
1342 
1343                 current->state = TASK_INTERRUPTIBLE;
1344 
1345                 skb = sk->rqueue;
1346                 do {
1347                         if (!skb)
1348                                 break;
1349                         if (before(1+*seq, skb->h.th->seq))
1350                                 break;
1351                         offset = 1 + *seq - skb->h.th->seq;
1352                         if (skb->h.th->syn)
1353                                 offset--;
1354                         if (offset < skb->len)
1355                                 goto found_ok_skb;
1356                         if (!(flags & MSG_PEEK))
1357                                 skb->used = 1;
1358                         skb = (struct sk_buff *)skb->next;
1359                 } while (skb != sk->rqueue);
1360 
1361                 if (copied)
1362                         break;
1363 
1364                 if (sk->err) {
1365                         copied = -sk->err;
1366                         sk->err = 0;
1367                         break;
1368                 }
1369 
1370                 if (sk->state == TCP_CLOSE) {
1371                         if (!sk->done) {
1372                                 sk->done = 1;
1373                                 break;
1374                         }
1375                         copied = -ENOTCONN;
1376                         break;
1377                 }
1378 
1379                 if (sk->shutdown & RCV_SHUTDOWN) {
1380                         sk->done = 1;
1381                         break;
1382                 }
1383                         
1384                 if (nonblock) {
1385                         copied = -EAGAIN;
1386                         break;
1387                 }
1388 
1389                 cleanup_rbuf(sk);
1390                 release_sock(sk);
1391                 schedule();
1392                 sk->inuse = 1;
1393 
1394                 if (current->signal & ~current->blocked) {
1395                         copied = -ERESTARTSYS;
1396                         break;
1397                 }
1398                 continue;
1399 
1400         found_ok_skb:
1401                 /* Ok so how much can we use ? */
1402                 used = skb->len - offset;
1403                 if (len < used)
1404                         used = len;
1405                 /* do we have urgent data here? */
1406                 if (sk->urg_data) {
1407                         unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1408                         if (urg_offset < used) {
1409                                 if (!urg_offset) {
1410                                         if (!sk->urginline) {
1411                                                 ++*seq;
1412                                                 offset++;
1413                                                 used--;
1414                                         }
1415                                 } else
1416                                         used = urg_offset;
1417                         }
1418                 }
1419                 /* Copy it */
1420                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1421                         skb->h.th->doff*4 + offset, used);
1422                 copied += used;
1423                 len -= used;
1424                 to += used;
1425                 *seq += used;
1426                 if (after(sk->copied_seq+1,sk->urg_seq))
1427                         sk->urg_data = 0;
1428                 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1429                         skb->used = 1;
1430         }
1431         remove_wait_queue(sk->sleep, &wait);
1432         current->state = TASK_RUNNING;
1433 
1434         /* Clean up data we have read: This will do ACK frames */
1435         cleanup_rbuf(sk);
1436         release_sock(sk);
1437         DPRINTF((DBG_TCP, "tcp_read: returning %d\n", copied));
1438         return copied;
1439 }
1440 
1441  
1442 /*
1443  * Send a FIN without closing the connection.
1444  * Not called at interrupt time.
1445  */
1446 void
1447 tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1448 {
1449   struct sk_buff *buff;
1450   struct tcphdr *t1, *th;
1451   struct proto *prot;
1452   int tmp;
1453   struct device *dev = NULL;
1454 
1455   /*
1456    * We need to grab some memory, and put together a FIN,
1457    * and then put it into the queue to be sent.
1458    * FIXME:
1459    *    Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1460    *    Most of this is guesswork, so maybe it will work...
1461    */
1462   /* If we've already sent a FIN, return. */
1463   if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2) return;
1464   if (!(how & SEND_SHUTDOWN)) return;
1465   sk->inuse = 1;
1466 
1467   /* Clear out any half completed packets. */
1468   if (sk->partial)
1469         tcp_send_partial(sk);
1470 
1471   prot =(struct proto *)sk->prot;
1472   th =(struct tcphdr *)&sk->dummy_th;
1473   release_sock(sk); /* incase the malloc sleeps. */
1474   buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1475   if (buff == NULL) return;
1476   sk->inuse = 1;
1477 
1478   DPRINTF((DBG_TCP, "tcp_shutdown_send buff = %X\n", buff));
1479   buff->mem_addr = buff;
1480   buff->mem_len = MAX_RESET_SIZE;
1481   buff->sk = sk;
1482   buff->len = sizeof(*t1);
1483   t1 =(struct tcphdr *) buff->data;
1484 
1485   /* Put in the IP header and routing stuff. */
1486   tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1487                            IPPROTO_TCP, sk->opt,
1488                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1489   if (tmp < 0) {
1490         buff->free=1;
1491         prot->wfree(sk,buff->mem_addr, buff->mem_len);
1492         release_sock(sk);
1493         DPRINTF((DBG_TCP, "Unable to build header for fin.\n"));
1494         return;
1495   }
1496 
1497   t1 =(struct tcphdr *)((char *)t1 +tmp);
1498   buff->len += tmp;
1499   buff->dev = dev;
1500   memcpy(t1, th, sizeof(*t1));
1501   t1->seq = ntohl(sk->send_seq);
1502   sk->send_seq++;
1503   buff->h.seq = sk->send_seq;
1504   t1->ack = 1;
1505   t1->ack_seq = ntohl(sk->acked_seq);
1506   t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
1507   t1->fin = 1;
1508   t1->rst = 0;
1509   t1->doff = sizeof(*t1)/4;
1510   tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1511 
1512   /*
1513    * Can't just queue this up.
1514    * It should go at the end of the write queue.
1515    */
1516   if (sk->wback != NULL) {
1517         buff->free=0;   
1518         buff->next = NULL;
1519         sk->wback->next = buff;
1520         sk->wback = buff;
1521         buff->magic = TCP_WRITE_QUEUE_MAGIC;
1522   } else {
1523         sk->prot->queue_xmit(sk, dev, buff, 0);
1524   }
1525 
1526   if (sk->state == TCP_ESTABLISHED) sk->state = TCP_FIN_WAIT1;
1527     else sk->state = TCP_FIN_WAIT2;
1528 
1529   release_sock(sk);
1530 }
1531 
1532 
1533 static int
1534 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1535              int to_len, int nonblock, unsigned flags,
1536              struct sockaddr_in *addr, int *addr_len)
1537 {
1538   struct sockaddr_in sin;
1539   int len;
1540   int err;
1541   int result;
1542   
1543   /* Have to check these first unlike the old code. If 
1544      we check them after we lose data on an error
1545      which is wrong */
1546   err = verify_area(VERIFY_WRITE,addr_len,sizeof(long));
1547   if(err)
1548         return err;
1549   len = get_fs_long(addr_len);
1550   if(len > sizeof(sin))
1551         len = sizeof(sin);
1552   err=verify_area(VERIFY_WRITE, addr, len);  
1553   if(err)
1554         return err;
1555         
1556   result=tcp_read(sk, to, to_len, nonblock, flags);
1557 
1558   if (result < 0) return(result);
1559   
1560   sin.sin_family = AF_INET;
1561   sin.sin_port = sk->dummy_th.dest;
1562   sin.sin_addr.s_addr = sk->daddr;
1563 
1564   memcpy_tofs(addr, &sin, len);
1565   put_fs_long(len, addr_len);
1566   return(result);
1567 }
1568 
1569 
1570 /* This routine will send an RST to the other tcp. */
1571 static void
1572 tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1573           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1574 {
1575   struct sk_buff *buff;
1576   struct tcphdr *t1;
1577   int tmp;
1578 
1579   /*
1580    * We need to grab some memory, and put together an RST,
1581    * and then put it into the queue to be sent.
1582    */
1583   buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1584   if (buff == NULL) 
1585         return;
1586 
1587   DPRINTF((DBG_TCP, "tcp_reset buff = %X\n", buff));
1588   buff->mem_addr = buff;
1589   buff->mem_len = MAX_RESET_SIZE;
1590   buff->len = sizeof(*t1);
1591   buff->sk = NULL;
1592   buff->dev = dev;
1593 
1594   t1 =(struct tcphdr *) buff->data;
1595 
1596   /* Put in the IP header and routing stuff. */
1597   tmp = prot->build_header(buff, saddr, daddr, &dev, IPPROTO_TCP, opt,
1598                            sizeof(struct tcphdr),tos,ttl);
1599   if (tmp < 0) {
1600         buff->free = 1;
1601         prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1602         return;
1603   }
1604   t1 =(struct tcphdr *)((char *)t1 +tmp);
1605   buff->len += tmp;
1606   memcpy(t1, th, sizeof(*t1));
1607 
1608   /* Swap the send and the receive. */
1609   t1->dest = th->source;
1610   t1->source = th->dest;
1611   t1->rst = 1;  
1612   t1->window = 0;
1613   
1614   if(th->ack)
1615   {
1616         t1->ack=0;
1617         t1->seq=th->ack_seq;
1618         t1->ack_seq=0;
1619   }
1620   else
1621   {
1622         t1->ack=1;
1623         if(!th->syn)
1624                 t1->ack_seq=htonl(th->seq);
1625         else
1626                 t1->ack_seq=htonl(th->seq+1);
1627         t1->seq=0;
1628   }
1629 
1630   t1->syn = 0;
1631   t1->urg = 0;
1632   t1->fin = 0;
1633   t1->psh = 0;
1634   t1->doff = sizeof(*t1)/4;
1635   tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1636   prot->queue_xmit(NULL, dev, buff, 1);
1637 }
1638 
1639 
1640 /*
1641  *      Look for tcp options. Parses everything but only knows about MSS.
1642  *      This routine is always called with the packet containing the SYN.
1643  *      However it may also be called with the ack to the SYN.  So you
1644  *      can't assume this is always the SYN.  It's always called after
1645  *      we have set up sk->mtu to our own MTU.
1646  */
1647  
1648 static void
1649 tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1650 {
1651   unsigned char *ptr;
1652   int length=(th->doff*4)-sizeof(struct tcphdr);
1653   int mss_seen = 0;
1654     
1655   ptr = (unsigned char *)(th + 1);
1656   
1657   while(length>0)
1658   {
1659         int opcode=*ptr++;
1660         int opsize=*ptr++;
1661         switch(opcode)
1662         {
1663                 case TCPOPT_EOL:
1664                         return;
1665                 case TCPOPT_NOP:
1666                         length-=2;
1667                         continue;
1668                 
1669                 default:
1670                         if(opsize<=2)   /* Avoid silly options looping forever */
1671                                 return;
1672                         switch(opcode)
1673                         {
1674                                 case TCPOPT_MSS:
1675                                         if(opsize==4 && th->syn)
1676                                         {
1677                                                 sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1678                                                 mss_seen = 1;
1679                                         }
1680                                         break;
1681                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1682                         }
1683                         ptr+=opsize-2;
1684                         length-=opsize;
1685         }
1686   }
1687   if (th->syn) {
1688     if (! mss_seen)
1689       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1690   }
1691   sk->mss = min(sk->max_window, sk->mtu);
1692 }
1693 
1694 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1695 {
1696         dst = ntohl(dst);
1697         if (IN_CLASSA(dst))
1698                 return htonl(IN_CLASSA_NET);
1699         if (IN_CLASSB(dst))
1700                 return htonl(IN_CLASSB_NET);
1701         return htonl(IN_CLASSC_NET);
1702 }
1703 
1704 /*
1705  * This routine handles a connection request.
1706  * It should make sure we haven't already responded.
1707  * Because of the way BSD works, we have to send a syn/ack now.
1708  * This also means it will be harder to close a socket which is
1709  * listening.
1710  */
1711 static void
1712 tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1713                  unsigned long daddr, unsigned long saddr,
1714                  struct options *opt, struct device *dev)
1715 {
1716   struct sk_buff *buff;
1717   struct tcphdr *t1;
1718   unsigned char *ptr;
1719   struct sock *newsk;
1720   struct tcphdr *th;
1721   int tmp;
1722 
1723   DPRINTF((DBG_TCP, "tcp_conn_request(sk = %X, skb = %X, daddr = %X, sadd4= %X, \n"
1724           "                  opt = %X, dev = %X)\n",
1725           sk, skb, daddr, saddr, opt, dev));
1726   
1727   th = skb->h.th;
1728 
1729   /* If the socket is dead, don't accept the connection. */
1730   if (!sk->dead) {
1731         sk->data_ready(sk,0);
1732   } else {
1733         DPRINTF((DBG_TCP, "tcp_conn_request on dead socket\n"));
1734         tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1735         kfree_skb(skb, FREE_READ);
1736         return;
1737   }
1738 
1739   /*
1740    * Make sure we can accept more.  This will prevent a
1741    * flurry of syns from eating up all our memory.
1742    */
1743   if (sk->ack_backlog >= sk->max_ack_backlog) {
1744         kfree_skb(skb, FREE_READ);
1745         return;
1746   }
1747 
1748   /*
1749    * We need to build a new sock struct.
1750    * It is sort of bad to have a socket without an inode attached
1751    * to it, but the wake_up's will just wake up the listening socket,
1752    * and if the listening socket is destroyed before this is taken
1753    * off of the queue, this will take care of it.
1754    */
1755   newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
1756   if (newsk == NULL) {
1757         /* just ignore the syn.  It will get retransmitted. */
1758         kfree_skb(skb, FREE_READ);
1759         return;
1760   }
1761 
1762   DPRINTF((DBG_TCP, "newsk = %X\n", newsk));
1763   memcpy((void *)newsk,(void *)sk, sizeof(*newsk));
1764   newsk->wback = NULL;
1765   newsk->wfront = NULL;
1766   newsk->rqueue = NULL;
1767   newsk->send_head = NULL;
1768   newsk->send_tail = NULL;
1769   newsk->back_log = NULL;
1770   newsk->rtt = TCP_CONNECT_TIME << 3;
1771   newsk->rto = TCP_CONNECT_TIME;
1772   newsk->mdev = 0;
1773   newsk->max_window = 0;
1774   newsk->cong_window = 1;
1775   newsk->cong_count = 0;
1776   newsk->ssthresh = 0;
1777   newsk->backoff = 0;
1778   newsk->blog = 0;
1779   newsk->intr = 0;
1780   newsk->proc = 0;
1781   newsk->done = 0;
1782   newsk->partial = NULL;
1783   newsk->pair = NULL;
1784   newsk->wmem_alloc = 0;
1785   newsk->rmem_alloc = 0;
1786 
1787   newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
1788 
1789   newsk->err = 0;
1790   newsk->shutdown = 0;
1791   newsk->ack_backlog = 0;
1792   newsk->acked_seq = skb->h.th->seq+1;
1793   newsk->fin_seq = skb->h.th->seq;
1794   newsk->copied_seq = skb->h.th->seq;
1795   newsk->state = TCP_SYN_RECV;
1796   newsk->timeout = 0;
1797   newsk->send_seq = jiffies * SEQ_TICK - seq_offset;
1798   newsk->window_seq = newsk->send_seq;
1799   newsk->rcv_ack_seq = newsk->send_seq;
1800   newsk->urg_data = 0;
1801   newsk->retransmits = 0;
1802   newsk->destroy = 0;
1803   newsk->timer.data = (unsigned long)newsk;
1804   newsk->timer.function = &net_timer;
1805   newsk->dummy_th.source = skb->h.th->dest;
1806   newsk->dummy_th.dest = skb->h.th->source;
1807 
1808   /* Swap these two, they are from our point of view. */
1809   newsk->daddr = saddr;
1810   newsk->saddr = daddr;
1811 
1812   put_sock(newsk->num,newsk);
1813   newsk->dummy_th.res1 = 0;
1814   newsk->dummy_th.doff = 6;
1815   newsk->dummy_th.fin = 0;
1816   newsk->dummy_th.syn = 0;
1817   newsk->dummy_th.rst = 0;
1818   newsk->dummy_th.psh = 0;
1819   newsk->dummy_th.ack = 0;
1820   newsk->dummy_th.urg = 0;
1821   newsk->dummy_th.res2 = 0;
1822   newsk->acked_seq = skb->h.th->seq + 1;
1823   newsk->copied_seq = skb->h.th->seq;
1824 
1825   /* Grab the ttl and tos values and use them */
1826   newsk->ip_ttl=sk->ip_ttl;
1827   newsk->ip_tos=skb->ip_hdr->tos;
1828 
1829 /* use 512 or whatever user asked for */
1830 /* note use of sk->user_mss, since user has no direct access to newsk */
1831   if (sk->user_mss)
1832     newsk->mtu = sk->user_mss;
1833   else {
1834 #ifdef SUBNETSARELOCAL
1835     if ((saddr ^ daddr) & default_mask(saddr))
1836 #else
1837     if ((saddr ^ daddr) & dev->pa_mask)
1838 #endif
1839       newsk->mtu = 576 - HEADER_SIZE;
1840     else
1841       newsk->mtu = MAX_WINDOW;
1842   }
1843 /* but not bigger than device MTU */
1844   newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
1845 
1846 /* this will min with what arrived in the packet */
1847   tcp_options(newsk,skb->h.th);
1848 
1849   buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
1850   if (buff == NULL) {
1851         sk->err = -ENOMEM;
1852         newsk->dead = 1;
1853         release_sock(newsk);
1854         kfree_skb(skb, FREE_READ);
1855         return;
1856   }
1857   
1858   buff->mem_addr = buff;
1859   buff->mem_len = MAX_SYN_SIZE;
1860   buff->len = sizeof(struct tcphdr)+4;
1861   buff->sk = newsk;
1862   
1863   t1 =(struct tcphdr *) buff->data;
1864 
1865   /* Put in the IP header and routing stuff. */
1866   tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &dev,
1867                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
1868 
1869   /* Something went wrong. */
1870   if (tmp < 0) {
1871         sk->err = tmp;
1872         buff->free=1;
1873         kfree_skb(buff,FREE_WRITE);
1874         newsk->dead = 1;
1875         release_sock(newsk);
1876         skb->sk = sk;
1877         kfree_skb(skb, FREE_READ);
1878         return;
1879   }
1880 
1881   buff->len += tmp;
1882   t1 =(struct tcphdr *)((char *)t1 +tmp);
1883   
1884   memcpy(t1, skb->h.th, sizeof(*t1));
1885   buff->h.seq = newsk->send_seq;
1886 
1887   /* Swap the send and the receive. */
1888   t1->dest = skb->h.th->source;
1889   t1->source = newsk->dummy_th.source;
1890   t1->seq = ntohl(newsk->send_seq++);
1891   t1->ack = 1;
1892   newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/
1893   t1->window = ntohs(newsk->window);
1894   t1->res1 = 0;
1895   t1->res2 = 0;
1896   t1->rst = 0;
1897   t1->urg = 0;
1898   t1->psh = 0;
1899   t1->syn = 1;
1900   t1->ack_seq = ntohl(skb->h.th->seq+1);
1901   t1->doff = sizeof(*t1)/4+1;
1902 
1903   ptr =(unsigned char *)(t1+1);
1904   ptr[0] = 2;
1905   ptr[1] = 4;
1906   ptr[2] = ((newsk->mtu) >> 8) & 0xff;
1907   ptr[3] =(newsk->mtu) & 0xff;
1908 
1909   tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
1910   newsk->prot->queue_xmit(newsk, dev, buff, 0);
1911 
1912   reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_CONNECT_TIME);
1913   skb->sk = newsk;
1914 
1915   /* Charge the sock_buff to newsk. */
1916   sk->rmem_alloc -= skb->mem_len;
1917   newsk->rmem_alloc += skb->mem_len;
1918 
1919   skb_queue_tail(&sk->rqueue,skb);
1920   sk->ack_backlog++;
1921   release_sock(newsk);
1922 }
1923 
1924 
1925 static void
1926 tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
1927 {
1928   struct sk_buff *buff;
1929   int need_reset = 0;
1930   struct tcphdr *t1, *th;
1931   struct proto *prot;
1932   struct device *dev=NULL;
1933   int tmp;
1934 
1935   /*
1936    * We need to grab some memory, and put together a FIN,
1937    * and then put it into the queue to be sent.
1938    */
1939   DPRINTF((DBG_TCP, "tcp_close((struct sock *)%X, %d)\n",sk, timeout));
1940   sk->inuse = 1;
1941   sk->keepopen = 1;
1942   sk->shutdown = SHUTDOWN_MASK;
1943 
1944   if (!sk->dead) 
1945         sk->state_change(sk);
1946 
1947   /* We need to flush the recv. buffs. */
1948   if (skb_peek(&sk->rqueue) != NULL) 
1949   {
1950         struct sk_buff *skb;
1951         if(sk->debug)
1952                 printk("Clean rcv queue\n");
1953         while((skb=skb_dequeue(&sk->rqueue))!=NULL)
1954         {
1955                 if(skb->len > 0 && after(skb->h.th->seq + skb->len + 1 , sk->copied_seq))
1956                                 need_reset = 1;
1957                 kfree_skb(skb, FREE_READ);
1958         }
1959         if(sk->debug)
1960                 printk("Cleaned.\n");
1961   }
1962   sk->rqueue = NULL;
1963 
1964   /* Get rid off any half-completed packets. */
1965   if (sk->partial) {
1966         tcp_send_partial(sk);
1967   }
1968 
1969   switch(sk->state) {
1970         case TCP_FIN_WAIT1:
1971         case TCP_FIN_WAIT2:
1972         case TCP_LAST_ACK:
1973                 /* start a timer. */
1974                 /* original code was 4 * sk->rtt.  In converting to the
1975                  * new rtt representation, we can't quite use that.
1976                  * it seems to make most sense to  use the backed off value
1977                  */
1978                 reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
1979                 if (timeout) tcp_time_wait(sk);
1980                 release_sock(sk);
1981                 return; /* break causes a double release - messy */
1982         case TCP_TIME_WAIT:
1983                 if (timeout) {
1984                   sk->state = TCP_CLOSE;
1985                 }
1986                 release_sock(sk);
1987                 return;
1988         case TCP_LISTEN:
1989                 sk->state = TCP_CLOSE;
1990                 release_sock(sk);
1991                 return;
1992         case TCP_CLOSE:
1993                 release_sock(sk);
1994                 return;
1995         case TCP_CLOSE_WAIT:
1996         case TCP_ESTABLISHED:
1997         case TCP_SYN_SENT:
1998         case TCP_SYN_RECV:
1999                 prot =(struct proto *)sk->prot;
2000                 th =(struct tcphdr *)&sk->dummy_th;
2001                 buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2002                 if (buff == NULL) {
2003                         /* This will force it to try again later. */
2004                         /* Or it would have if someone released the socket
2005                            first. Anyway it might work now */
2006                         release_sock(sk);
2007                         if (sk->state != TCP_CLOSE_WAIT)
2008                                         sk->state = TCP_ESTABLISHED;
2009                         reset_timer(sk, TIME_CLOSE, 100);
2010                         return;
2011                 }
2012                 buff->mem_addr = buff;
2013                 buff->mem_len = MAX_FIN_SIZE;
2014                 buff->sk = sk;
2015                 buff->free = 1;
2016                 buff->len = sizeof(*t1);
2017                 t1 =(struct tcphdr *) buff->data;
2018 
2019                 /* Put in the IP header and routing stuff. */
2020                 tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2021                                          IPPROTO_TCP, sk->opt,
2022                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2023                 if (tmp < 0) {
2024                         kfree_skb(buff,FREE_WRITE);
2025                         DPRINTF((DBG_TCP, "Unable to build header for fin.\n"));
2026                         release_sock(sk);
2027                         return;
2028                 }
2029 
2030                 t1 =(struct tcphdr *)((char *)t1 +tmp);
2031                 buff->len += tmp;
2032                 buff->dev = dev;
2033                 memcpy(t1, th, sizeof(*t1));
2034                 t1->seq = ntohl(sk->send_seq);
2035                 sk->send_seq++;
2036                 buff->h.seq = sk->send_seq;
2037                 t1->ack = 1;
2038 
2039                 /* Ack everything immediately from now on. */
2040                 sk->delay_acks = 0;
2041                 t1->ack_seq = ntohl(sk->acked_seq);
2042                 t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
2043                 t1->fin = 1;
2044                 t1->rst = need_reset;
2045                 t1->doff = sizeof(*t1)/4;
2046                 tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2047 
2048                 if (sk->wfront == NULL) {
2049                         prot->queue_xmit(sk, dev, buff, 0);
2050                 } else {
2051                         reset_timer(sk, TIME_WRITE, sk->rto);
2052                         buff->next = NULL;
2053                         if (sk->wback == NULL) {
2054                                 sk->wfront = buff;
2055                         } else {
2056                                 sk->wback->next = buff;
2057                         }
2058                         sk->wback = buff;
2059                         buff->magic = TCP_WRITE_QUEUE_MAGIC;
2060                 }
2061 
2062                 if (sk->state == TCP_CLOSE_WAIT) {
2063                         sk->state = TCP_FIN_WAIT2;
2064                 } else {
2065                         sk->state = TCP_FIN_WAIT1;
2066         }
2067   }
2068   release_sock(sk);
2069 }
2070 
2071 
2072 /*
2073  * This routine takes stuff off of the write queue,
2074  * and puts it in the xmit queue.
2075  */
2076 static void
2077 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2078 {
2079   struct sk_buff *skb;
2080 
2081   DPRINTF((DBG_TCP, "tcp_write_xmit(sk=%X)\n", sk));
2082 
2083   /* The bytes will have to remain here. In time closedown will
2084      empty the write queue and all will be happy */
2085   if(sk->zapped)
2086         return;
2087 
2088   while(sk->wfront != NULL &&
2089         before(sk->wfront->h.seq, sk->window_seq +1) &&
2090         (sk->retransmits == 0 ||
2091          sk->timeout != TIME_WRITE ||
2092          before(sk->wfront->h.seq, sk->rcv_ack_seq +1))
2093         && sk->packets_out < sk->cong_window) {
2094                 skb = sk->wfront;
2095                 IS_SKB(skb);
2096                 sk->wfront = skb->next;
2097                 if (sk->wfront == NULL) sk->wback = NULL;
2098                 skb->next = NULL;
2099                 if (skb->magic != TCP_WRITE_QUEUE_MAGIC) {
2100                         printk("tcp.c skb with bad magic(%X) on write queue. Squashing "
2101                                 "queue\n", skb->magic);
2102                         sk->wfront = NULL;
2103                         sk->wback = NULL;
2104                         return;
2105                 }
2106                 skb->magic = 0;
2107                 DPRINTF((DBG_TCP, "Sending a packet.\n"));
2108 
2109                 /* See if we really need to send the packet. */
2110                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) {
2111                         sk->retransmits = 0;
2112                         kfree_skb(skb, FREE_WRITE);
2113                         if (!sk->dead) sk->write_space(sk);
2114                 } else {
2115                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2116                 }
2117         }
2118 }
2119 
2120 
2121 /*
2122  * This routine sorts the send list, and resets the
2123  * sk->send_head and sk->send_tail pointers.
2124  */
2125 void
2126 sort_send(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2127 {
2128   struct sk_buff *list = NULL;
2129   struct sk_buff *skb,*skb2,*skb3;
2130 
2131   for (skb = sk->send_head; skb != NULL; skb = skb2) {
2132         skb2 = (struct sk_buff *)skb->link3;
2133         if (list == NULL || before (skb2->h.seq, list->h.seq)) {
2134                 skb->link3 = list;
2135                 sk->send_tail = skb;
2136                 list = skb;
2137         } else {
2138                 for (skb3 = list; ; skb3 = (struct sk_buff *)skb3->link3) {
2139                         if (skb3->link3 == NULL ||
2140                             before(skb->h.seq, skb3->link3->h.seq)) {
2141                                 skb->link3 = skb3->link3;
2142                                 skb3->link3 = skb;
2143                                 if (skb->link3 == NULL) sk->send_tail = skb;
2144                                 break;
2145                         }
2146                 }
2147         }
2148   }
2149   sk->send_head = list;
2150 }
2151   
2152 
2153 /* This routine deals with incoming acks, but not outgoing ones. */
2154 static int
2155 tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2156 {
2157   unsigned long ack;
2158   int flag = 0;
2159   /* 
2160    * 1 - there was data in packet as well as ack or new data is sent or 
2161    *     in shutdown state
2162    * 2 - data from retransmit queue was acked and removed
2163    * 4 - window shrunk or data from retransmit queue was acked and removed
2164    */
2165 
2166   if(sk->zapped)
2167         return(1);      /* Dead, cant ack any more so why bother */
2168 
2169   ack = ntohl(th->ack_seq);
2170   DPRINTF((DBG_TCP, "tcp_ack ack=%d, window=%d, "
2171           "sk->rcv_ack_seq=%d, sk->window_seq = %d\n",
2172           ack, ntohs(th->window), sk->rcv_ack_seq, sk->window_seq));
2173 
2174   if (ntohs(th->window) > sk->max_window) {
2175         sk->max_window = ntohs(th->window);
2176         sk->mss = min(sk->max_window, sk->mtu);
2177   }
2178 
2179   if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2180         sk->retransmits = 0;
2181 
2182   if (after(ack, sk->send_seq+1) || before(ack, sk->rcv_ack_seq-1)) {
2183         if (after(ack, sk->send_seq) ||
2184            (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) {
2185                 return(0);
2186         }
2187         if (sk->keepopen) {
2188                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2189         }
2190         return(1);
2191   }
2192 
2193   if (len != th->doff*4) flag |= 1;
2194 
2195   /* See if our window has been shrunk. */
2196   if (after(sk->window_seq, ack+ntohs(th->window))) {
2197         /*
2198          * We may need to move packets from the send queue
2199          * to the write queue, if the window has been shrunk on us.
2200          * The RFC says you are not allowed to shrink your window
2201          * like this, but if the other end does, you must be able
2202          * to deal with it.
2203          */
2204         struct sk_buff *skb;
2205         struct sk_buff *skb2;
2206         struct sk_buff *wskb = NULL;
2207   
2208         skb2 = sk->send_head;
2209         sk->send_head = NULL;
2210         sk->send_tail = NULL;
2211 
2212         flag |= 4;
2213 
2214         sk->window_seq = ack + ntohs(th->window);
2215         cli();
2216         while (skb2 != NULL) {
2217                 skb = skb2;
2218                 skb2 = (struct sk_buff *)skb->link3;
2219                 skb->link3 = NULL;
2220                 if (after(skb->h.seq, sk->window_seq)) {
2221                         if (sk->packets_out > 0) sk->packets_out--;
2222                         /* We may need to remove this from the dev send list. */
2223                         if (skb->next != NULL) {
2224                                 skb_unlink(skb);                                
2225                         }
2226                         /* Now add it to the write_queue. */
2227                         skb->magic = TCP_WRITE_QUEUE_MAGIC;
2228                         if (wskb == NULL) {
2229                                 skb->next = sk->wfront;
2230                                 sk->wfront = skb;
2231                         } else {
2232                                 skb->next = wskb->next;
2233                                 wskb->next = skb;
2234                         }
2235                         if (sk->wback == wskb) sk->wback = skb;
2236                         wskb = skb;
2237                 } else {
2238                         if (sk->send_head == NULL) {
2239                                 sk->send_head = skb;
2240                                 sk->send_tail = skb;
2241                         } else {
2242                                 sk->send_tail->link3 = skb;
2243                                 sk->send_tail = skb;
2244                         }
2245                         skb->link3 = NULL;
2246                 }
2247         }
2248         sti();
2249   }
2250 
2251   if (sk->send_tail == NULL || sk->send_head == NULL) {
2252         sk->send_head = NULL;
2253         sk->send_tail = NULL;
2254         sk->packets_out= 0;
2255   }
2256 
2257   sk->window_seq = ack + ntohs(th->window);
2258 
2259   /* We don't want too many packets out there. */
2260   if (sk->timeout == TIME_WRITE && 
2261       sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) {
2262 /* 
2263  * This is Jacobson's slow start and congestion avoidance. 
2264  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2265  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2266  * counter and increment it once every cwnd times.  It's possible
2267  * that this should be done only if sk->retransmits == 0.  I'm
2268  * interpreting "new data is acked" as including data that has
2269  * been retransmitted but is just now being acked.
2270  */
2271         if (sk->cong_window < sk->ssthresh)  
2272           /* in "safe" area, increase */
2273           sk->cong_window++;
2274         else {
2275           /* in dangerous area, increase slowly.  In theory this is
2276              sk->cong_window += 1 / sk->cong_window
2277            */
2278           if (sk->cong_count >= sk->cong_window) {
2279             sk->cong_window++;
2280             sk->cong_count = 0;
2281           } else 
2282             sk->cong_count++;
2283         }
2284   }
2285 
2286   DPRINTF((DBG_TCP, "tcp_ack: Updating rcv ack sequence.\n"));
2287   sk->rcv_ack_seq = ack;
2288 
2289   /*
2290    * if this ack opens up a zero window, clear backoff.  It was
2291    * being used to time the probes, and is probably far higher than
2292    * it needs to be for normal retransmission
2293    */
2294   if (sk->timeout == TIME_PROBE0) {
2295         if (sk->wfront != NULL &&   /* should always be non-null */
2296             ! before (sk->window_seq, sk->wfront->h.seq)) {
2297           sk->retransmits = 0;
2298           sk->backoff = 0;
2299           /* recompute rto from rtt.  this eliminates any backoff */
2300           sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2301           if (sk->rto > 120*HZ)
2302             sk->rto = 120*HZ;
2303           if (sk->rto < 1*HZ)
2304             sk->rto = 1*HZ;
2305         }
2306   }
2307 
2308   /* See if we can take anything off of the retransmit queue. */
2309   while(sk->send_head != NULL) {
2310         /* Check for a bug. */
2311         if (sk->send_head->link3 &&
2312             after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) {
2313                 printk("INET: tcp.c: *** bug send_list out of order.\n");
2314                 sort_send(sk);
2315         }
2316 
2317         if (before(sk->send_head->h.seq, ack+1)) {
2318                 struct sk_buff *oskb;
2319 
2320                 if (sk->retransmits) {
2321 
2322                   /* we were retransmitting.  don't count this in RTT est */
2323                   flag |= 2;
2324 
2325                   /*
2326                    * even though we've gotten an ack, we're still
2327                    * retransmitting as long as we're sending from
2328                    * the retransmit queue.  Keeping retransmits non-zero
2329                    * prevents us from getting new data interspersed with
2330                    * retransmissions.
2331                    */
2332 
2333                   if (sk->send_head->link3)
2334                     sk->retransmits = 1;
2335                   else
2336                     sk->retransmits = 0;
2337 
2338                 }
2339 
2340                 /*
2341                  * Note that we only reset backoff and rto in the
2342                  * rtt recomputation code.  And that doesn't happen
2343                  * if there were retransmissions in effect.  So the
2344                  * first new packet after the retransmissions is
2345                  * sent with the backoff still in effect.  Not until
2346                  * we get an ack from a non-retransmitted packet do
2347                  * we reset the backoff and rto.  This allows us to deal
2348                  * with a situation where the network delay has increased
2349                  * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2350                  */
2351 
2352                 /* We have one less packet out there. */
2353                 if (sk->packets_out > 0) sk->packets_out --;
2354                 DPRINTF((DBG_TCP, "skb=%X skb->h.seq = %d acked ack=%d\n",
2355                                 sk->send_head, sk->send_head->h.seq, ack));
2356 
2357                 /* Wake up the process, it can probably write more. */
2358                 if (!sk->dead) sk->write_space(sk);
2359 
2360                 oskb = sk->send_head;
2361 
2362                 if (!(flag&2)) {
2363                   long m;
2364 
2365                   /* The following amusing code comes from Jacobson's
2366                    * article in SIGCOMM '88.  Note that rtt and mdev
2367                    * are scaled versions of rtt and mean deviation.
2368                    * This is designed to be as fast as possible 
2369                    * m stands for "measurement".
2370                    */
2371 
2372                   m = jiffies - oskb->when;  /* RTT */
2373                   m -= (sk->rtt >> 3);       /* m is now error in rtt est */
2374                   sk->rtt += m;              /* rtt = 7/8 rtt + 1/8 new */
2375                   if (m < 0)
2376                     m = -m;                  /* m is now abs(error) */
2377                   m -= (sk->mdev >> 2);      /* similar update on mdev */
2378                   sk->mdev += m;             /* mdev = 3/4 mdev + 1/4 new */
2379 
2380                   /* now update timeout.  Note that this removes any backoff */
2381                   sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2382                   if (sk->rto > 120*HZ)
2383                     sk->rto = 120*HZ;
2384                   if (sk->rto < 1*HZ)
2385                     sk->rto = 1*HZ;
2386                   sk->backoff = 0;
2387 
2388                 }
2389                 flag |= (2|4);
2390 
2391                 cli();
2392 
2393                 oskb = sk->send_head;
2394                 IS_SKB(oskb);
2395                 sk->send_head =(struct sk_buff *)oskb->link3;
2396                 if (sk->send_head == NULL) {
2397                         sk->send_tail = NULL;
2398                 }
2399 
2400                 /* We may need to remove this from the dev send list. */                
2401                 skb_unlink(oskb);       /* Much easier! */
2402                 sti();
2403                 oskb->magic = 0;
2404                 kfree_skb(oskb, FREE_WRITE); /* write. */
2405                 if (!sk->dead) sk->write_space(sk);
2406         } else {
2407                 break;
2408         }
2409   }
2410 
2411   /*
2412    * Maybe we can take some stuff off of the write queue,
2413    * and put it onto the xmit queue.
2414    */
2415   if (sk->wfront != NULL) {
2416         if (after (sk->window_seq+1, sk->wfront->h.seq) &&
2417                 (sk->retransmits == 0 || 
2418                  sk->timeout != TIME_WRITE ||
2419                  before(sk->wfront->h.seq, sk->rcv_ack_seq +1))
2420                 && sk->packets_out < sk->cong_window) {
2421                 flag |= 1;
2422                 tcp_write_xmit(sk);
2423         } else if (before(sk->window_seq, sk->wfront->h.seq) &&
2424                    sk->send_head == NULL &&
2425                    sk->ack_backlog == 0 &&
2426                    sk->state != TCP_TIME_WAIT) {
2427                 reset_timer(sk, TIME_PROBE0, sk->rto);
2428         }               
2429   } else {
2430         if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2431             sk->state != TCP_TIME_WAIT && !sk->keepopen) {
2432                 DPRINTF((DBG_TCP, "Nothing to do, going to sleep.\n")); 
2433                 if (!sk->dead) sk->write_space(sk);
2434 
2435                 if (sk->keepopen)
2436                         reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2437                 else
2438                         delete_timer(sk);
2439         } else {
2440                 if (sk->state != (unsigned char) sk->keepopen) {
2441                         reset_timer(sk, TIME_WRITE, sk->rto);
2442                 }
2443                 if (sk->state == TCP_TIME_WAIT) {
2444                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2445                 }
2446         }
2447   }
2448 
2449   if (sk->packets_out == 0 && sk->partial != NULL &&
2450       sk->wfront == NULL && sk->send_head == NULL) {
2451         flag |= 1;
2452         tcp_send_partial(sk);
2453   }
2454 
2455   /* See if we are done. */
2456   if (sk->state == TCP_TIME_WAIT) {
2457         if (!sk->dead)
2458                 sk->state_change(sk);
2459         if (sk->rcv_ack_seq == sk->send_seq && sk->acked_seq == sk->fin_seq) {
2460                 flag |= 1;
2461                 sk->state = TCP_CLOSE;
2462                 sk->shutdown = SHUTDOWN_MASK;
2463         }
2464   }
2465 
2466   if (sk->state == TCP_LAST_ACK || sk->state == TCP_FIN_WAIT2) {
2467         if (!sk->dead) sk->state_change(sk);
2468         if (sk->rcv_ack_seq == sk->send_seq) {
2469                 flag |= 1;
2470                 if (sk->acked_seq != sk->fin_seq) {
2471                         tcp_time_wait(sk);
2472                 } else {
2473                         DPRINTF((DBG_TCP, "tcp_ack closing socket - %X\n", sk));
2474                         tcp_send_ack(sk->send_seq, sk->acked_seq, sk,
2475                                      th, sk->daddr);
2476                         sk->shutdown = SHUTDOWN_MASK;
2477                         sk->state = TCP_CLOSE;
2478                 }
2479         }
2480   }
2481 
2482 /*
2483  * I make no guarantees about the first clause in the following
2484  * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
2485  * what conditions "!flag" would be true.  However I think the rest
2486  * of the conditions would prevent that from causing any
2487  * unnecessary retransmission. 
2488  *   Clearly if the first packet has expired it should be 
2489  * retransmitted.  The other alternative, "flag&2 && retransmits", is
2490  * harder to explain:  You have to look carefully at how and when the
2491  * timer is set and with what timeout.  The most recent transmission always
2492  * sets the timer.  So in general if the most recent thing has timed
2493  * out, everything before it has as well.  So we want to go ahead and
2494  * retransmit some more.  If we didn't explicitly test for this
2495  * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
2496  * would not be true.  If you look at the pattern of timing, you can
2497  * show that rto is increased fast enough that the next packet would
2498  * almost never be retransmitted immediately.  Then you'd end up
2499  * waiting for a timeout to send each packet on the retranmission
2500  * queue.  With my implementation of the Karn sampling algorithm,
2501  * the timeout would double each time.  The net result is that it would
2502  * take a hideous amount of time to recover from a single dropped packet.
2503  * It's possible that there should also be a test for TIME_WRITE, but
2504  * I think as long as "send_head != NULL" and "retransmit" is on, we've
2505  * got to be in real retransmission mode.
2506  *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
2507  * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
2508  * As long as no further losses occur, this seems reasonable.
2509  */
2510 
2511   if (((!flag) || (flag&4)) && sk->send_head != NULL &&
2512       (((flag&2) && sk->retransmits) ||
2513        (sk->send_head->when + sk->rto < jiffies))) {
2514         ip_do_retransmit(sk, 1);
2515         reset_timer(sk, TIME_WRITE, sk->rto);
2516       }
2517 
2518   DPRINTF((DBG_TCP, "leaving tcp_ack\n"));
2519   return(1);
2520 }
2521 
2522 
2523 /*
2524  * This routine handles the data.  If there is room in the buffer,
2525  * it will be have already been moved into it.  If there is no
2526  * room, then we will just have to discard the packet.
2527  */
2528 static int
2529 tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
2530          unsigned long saddr, unsigned short len)
2531 {
2532   struct sk_buff *skb1, *skb2;
2533   struct tcphdr *th;
2534   int dup_dumped=0;
2535 
2536   th = skb->h.th;
2537   print_th(th);
2538   skb->len = len -(th->doff*4);
2539 
2540   DPRINTF((DBG_TCP, "tcp_data len = %d sk = %X:\n", skb->len, sk));
2541 
2542   sk->bytes_rcv += skb->len;
2543   if (skb->len == 0 && !th->fin && !th->urg && !th->psh) {
2544         /* Don't want to keep passing ack's back and forth. */
2545         if (!th->ack) tcp_send_ack(sk->send_seq, sk->acked_seq,sk, th, saddr);
2546         kfree_skb(skb, FREE_READ);
2547         return(0);
2548   }
2549 
2550   if (sk->shutdown & RCV_SHUTDOWN) {
2551         sk->acked_seq = th->seq + skb->len + th->syn + th->fin;
2552         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
2553         sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
2554         sk->state = TCP_CLOSE;
2555         sk->err = EPIPE;
2556         sk->shutdown = SHUTDOWN_MASK;
2557         DPRINTF((DBG_TCP, "tcp_data: closing socket - %X\n", sk));
2558         kfree_skb(skb, FREE_READ);
2559         if (!sk->dead) sk->state_change(sk);
2560         return(0);
2561   }
2562 
2563   /*
2564    * Now we have to walk the chain, and figure out where this one
2565    * goes into it.  This is set up so that the last packet we received
2566    * will be the first one we look at, that way if everything comes
2567    * in order, there will be no performance loss, and if they come
2568    * out of order we will be able to fit things in nicely.
2569    */
2570 
2571   /* This should start at the last one, and then go around forwards. */
2572   if (sk->rqueue == NULL) {
2573         DPRINTF((DBG_TCP, "tcp_data: skb = %X:\n", skb));
2574 #ifdef OLDWAY
2575         sk->rqueue = skb;
2576         skb->next = skb;
2577         skb->prev = skb;
2578         skb->list = &sk->rqueue;
2579 #else
2580         skb_queue_head(&sk->rqueue,skb);
2581 #endif          
2582         skb1= NULL;
2583   } else {
2584         DPRINTF((DBG_TCP, "tcp_data adding to chain sk = %X:\n", sk));
2585         for(skb1=sk->rqueue->prev; ; skb1 =(struct sk_buff *)skb1->prev) {
2586                 if(sk->debug)
2587                 {
2588                         printk("skb1=%p :", skb1);
2589                         printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
2590                         printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
2591                         printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
2592                                         sk->acked_seq);
2593                 }
2594 #ifdef OLD              
2595                 if (after(th->seq+1, skb1->h.th->seq)) {
2596                         skb->prev = skb1;
2597                         skb->next = skb1->next;
2598                         skb->next->prev = skb;
2599                         skb1->next = skb;
2600                         if (skb1 == sk->rqueue) sk->rqueue = skb;
2601                         break;
2602                 }
2603                 if (skb1->prev == sk->rqueue) {
2604                         skb->next= skb1;
2605                         skb->prev = skb1->prev;
2606                         skb->prev->next = skb;
2607                         skb1->prev = skb;
2608                         skb1 = NULL; /* so we know we might be able
2609                                         to ack stuff. */
2610                         break;
2611                 }
2612 #else
2613                 if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
2614                 {
2615                         skb_append(skb1,skb);
2616                         skb_unlink(skb1);
2617                         kfree_skb(skb1,FREE_READ);
2618                         dup_dumped=1;
2619                         skb1=NULL;
2620                         break;
2621                 }
2622                 if (after(th->seq+1, skb1->h.th->seq))
2623                 {
2624                         skb_append(skb1,skb);
2625                         break;
2626                 }
2627                 if (skb1 == sk->rqueue)
2628                 {
2629                         skb_queue_head(&sk->rqueue, skb);               
2630                         break;
2631                 }
2632 #endif          
2633         }
2634         DPRINTF((DBG_TCP, "skb = %X:\n", skb));
2635   }
2636 
2637   th->ack_seq = th->seq + skb->len;
2638   if (th->syn) th->ack_seq++;
2639   if (th->fin) th->ack_seq++;
2640 
2641   if (before(sk->acked_seq, sk->copied_seq)) {
2642         printk("*** tcp.c:tcp_data bug acked < copied\n");
2643         sk->acked_seq = sk->copied_seq;
2644   }
2645 
2646   /* Now figure out if we can ack anything. */
2647   if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) {
2648       if (before(th->seq, sk->acked_seq+1)) {
2649                 int newwindow;
2650 
2651                 if (after(th->ack_seq, sk->acked_seq)) {
2652                         newwindow = sk->window -
2653                                        (th->ack_seq - sk->acked_seq);
2654                         if (newwindow < 0)
2655                                 newwindow = 0;  
2656                         sk->window = newwindow;
2657                         sk->acked_seq = th->ack_seq;
2658                 }
2659                 skb->acked = 1;
2660 
2661                 /* When we ack the fin, we turn on the RCV_SHUTDOWN flag. */
2662                 if (skb->h.th->fin) {
2663                         if (!sk->dead) sk->state_change(sk);
2664                         sk->shutdown |= RCV_SHUTDOWN;
2665                 }
2666           
2667                 for(skb2 = (struct sk_buff *)skb->next;
2668                     skb2 !=(struct sk_buff *) sk->rqueue;
2669                     skb2 = (struct sk_buff *)skb2->next) {
2670                         if (before(skb2->h.th->seq, sk->acked_seq+1)) {
2671                                 if (after(skb2->h.th->ack_seq, sk->acked_seq))
2672                                 {
2673                                         newwindow = sk->window -
2674                                          (skb2->h.th->ack_seq - sk->acked_seq);
2675                                         if (newwindow < 0)
2676                                                 newwindow = 0;  
2677                                         sk->window = newwindow;
2678                                         sk->acked_seq = skb2->h.th->ack_seq;
2679                                 }
2680                                 skb2->acked = 1;
2681 
2682                                 /*
2683                                  * When we ack the fin, we turn on
2684                                  * the RCV_SHUTDOWN flag.
2685                                  */
2686                                 if (skb2->h.th->fin) {
2687                                         sk->shutdown |= RCV_SHUTDOWN;
2688                                         if (!sk->dead) sk->state_change(sk);
2689                                 }
2690 
2691                                 /* Force an immediate ack. */
2692                                 sk->ack_backlog = sk->max_ack_backlog;
2693                         } else {
2694                                 break;
2695                         }
2696                 }
2697 
2698                 /*
2699                  * This also takes care of updating the window.
2700                  * This if statement needs to be simplified.
2701                  */
2702                 if (!sk->delay_acks ||
2703                     sk->ack_backlog >= sk->max_ack_backlog || 
2704                     sk->bytes_rcv > sk->max_unacked || th->fin) {
2705 /*                      tcp_send_ack(sk->send_seq, sk->acked_seq,sk,th, saddr); */
2706                 } else {
2707                         sk->ack_backlog++;
2708                         if(sk->debug)
2709                                 printk("Ack queued.\n");
2710                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
2711                 }
2712         }
2713   }
2714 
2715   /*
2716    * If we've missed a packet, send an ack.
2717    * Also start a timer to send another.
2718    */
2719   if (!skb->acked) {
2720         /*
2721          * This is important.  If we don't have much room left,
2722          * we need to throw out a few packets so we have a good
2723          * window.  Note that mtu is used, not mss, because mss is really
2724          * for the send side.  He could be sending us stuff as large as mtu.
2725          */
2726         while (sk->prot->rspace(sk) < sk->mtu) {
2727                 skb1 = skb_peek(&sk->rqueue);
2728                 if (skb1 == NULL) {
2729                         printk("INET: tcp.c:tcp_data memory leak detected.\n");
2730                         break;
2731                 }
2732 
2733                 /* Don't throw out something that has been acked. */
2734                 if (skb1->acked) {
2735                         break;
2736                 }
2737                 
2738                 skb_unlink(skb1);
2739 #ifdef OLDWAY           
2740                 if (skb1->prev == skb1) {
2741                         sk->rqueue = NULL;
2742                 } else {
2743                         sk->rqueue = (struct sk_buff *)skb1->prev;
2744                         skb1->next->prev = skb1->prev;
2745                         skb1->prev->next = skb1->next;
2746                 }
2747 #endif          
2748                 kfree_skb(skb1, FREE_READ);
2749         }
2750         tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
2751         sk->ack_backlog++;
2752         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
2753   } else {
2754         /* We missed a packet.  Send an ack to try to resync things. */
2755         tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
2756   }
2757 
2758   /* Now tell the user we may have some data. */
2759   if (!sk->dead) {
2760         if(sk->debug)
2761                 printk("Data wakeup.\n");
2762         sk->data_ready(sk,0);
2763   } else {
2764         DPRINTF((DBG_TCP, "data received on dead socket.\n"));
2765   }
2766 
2767   if (sk->state == TCP_FIN_WAIT2 &&
2768       sk->acked_seq == sk->fin_seq && sk->rcv_ack_seq == sk->send_seq) {
2769         DPRINTF((DBG_TCP, "tcp_data: entering last_ack state sk = %X\n", sk));
2770 
2771 /*      tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr); */
2772         sk->shutdown = SHUTDOWN_MASK;
2773         sk->state = TCP_LAST_ACK;
2774         if (!sk->dead) sk->state_change(sk);
2775   }
2776 
2777   return(0);
2778 }
2779 
2780 
2781 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
2782 {
2783         unsigned long ptr = ntohs(th->urg_ptr);
2784 
2785         if (ptr)
2786                 ptr--;
2787         ptr += th->seq;
2788 
2789         /* ignore urgent data that we've already seen and read */
2790         if (after(sk->copied_seq+1, ptr))
2791                 return;
2792 
2793         /* do we already have a newer (or duplicate) urgent pointer? */
2794         if (sk->urg_data && !after(ptr, sk->urg_seq))
2795                 return;
2796 
2797         /* tell the world about our new urgent pointer */
2798         if (sk->proc != 0) {
2799                 if (sk->proc > 0) {
2800                         kill_proc(sk->proc, SIGURG, 1);
2801                 } else {
2802                         kill_pg(-sk->proc, SIGURG, 1);
2803                 }
2804         }
2805         sk->urg_data = URG_NOTYET;
2806         sk->urg_seq = ptr;
2807 }
2808 
2809 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
2810         unsigned long saddr, unsigned long len)
2811 {
2812         unsigned long ptr;
2813 
2814         /* check if we get a new urgent pointer */
2815         if (th->urg)
2816                 tcp_check_urg(sk,th);
2817 
2818         /* do we wait for any urgent data? */
2819         if (sk->urg_data != URG_NOTYET)
2820                 return 0;
2821 
2822         /* is the urgent pointer pointing into this packet? */
2823         ptr = sk->urg_seq - th->seq + th->doff*4;
2824         if (ptr >= len)
2825                 return 0;
2826 
2827         /* ok, got the correct packet, update info */
2828         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
2829         if (!sk->dead)
2830                 wake_up_interruptible(sk->sleep);
2831         return 0;
2832 }
2833 
2834 
2835 /* This deals with incoming fins. 'Linus at 9 O'clock' 8-) */
2836 static int
2837 tcp_fin(struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
2838          unsigned long saddr, struct device *dev)
2839 {
2840   DPRINTF((DBG_TCP, "tcp_fin(sk=%X, th=%X, saddr=%X, dev=%X)\n",
2841                                                 sk, th, saddr, dev));
2842   
2843   if (!sk->dead) {
2844         sk->state_change(sk);
2845   }
2846 
2847   switch(sk->state) {
2848         case TCP_SYN_RECV:
2849         case TCP_SYN_SENT:
2850         case TCP_ESTABLISHED:
2851                 /* Contains the one that needs to be acked */
2852                 sk->fin_seq = th->seq+1;
2853                 sk->state = TCP_CLOSE_WAIT;
2854                 if (th->rst) sk->shutdown = SHUTDOWN_MASK;
2855                 break;
2856 
2857         case TCP_CLOSE_WAIT:
2858         case TCP_FIN_WAIT2:
2859                 break; /* we got a retransmit of the fin. */
2860 
2861         case TCP_FIN_WAIT1:
2862                 /* Contains the one that needs to be acked */
2863                 sk->fin_seq = th->seq+1;
2864                 sk->state = TCP_FIN_WAIT2;
2865                 break;
2866 
2867         default:
2868         case TCP_TIME_WAIT:
2869                 sk->state = TCP_LAST_ACK;
2870 
2871                 /* Start the timers. */
2872                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2873                 return(0);
2874   }
2875   sk->ack_backlog++;
2876 
2877   return(0);
2878 }
2879 
2880 
2881 /* This will accept the next outstanding connection. */
2882 static struct sock *
2883 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
2884 {
2885   struct sock *newsk;
2886   struct sk_buff *skb;
2887   
2888   DPRINTF((DBG_TCP, "tcp_accept(sk=%X, flags=%X, addr=%s)\n",
2889                                 sk, flags, in_ntoa(sk->saddr)));
2890 
2891   /*
2892    * We need to make sure that this socket is listening,
2893    * and that it has something pending.
2894    */
2895   if (sk->state != TCP_LISTEN) {
2896         sk->err = EINVAL;
2897         return(NULL); 
2898   }
2899 
2900   /* avoid the race. */
2901   cli();
2902   sk->inuse = 1;
2903   while((skb = get_firstr(sk)) == NULL) {
2904         if (flags & O_NONBLOCK) {
2905                 sti();
2906                 release_sock(sk);
2907                 sk->err = EAGAIN;
2908                 return(NULL);
2909         }
2910 
2911         release_sock(sk);
2912         interruptible_sleep_on(sk->sleep);
2913         if (current->signal & ~current->blocked) {
2914                 sti();
2915                 sk->err = ERESTARTSYS;
2916                 return(NULL);
2917         }
2918         sk->inuse = 1;
2919   }
2920   sti();
2921 
2922   /* Now all we need to do is return skb->sk. */
2923   newsk = skb->sk;
2924 
2925   kfree_skb(skb, FREE_READ);
2926   sk->ack_backlog--;
2927   release_sock(sk);
2928   return(newsk);
2929 }
2930 
2931 
2932 /* This will initiate an outgoing connection. */
2933 static int
2934 tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
2935 {
2936   struct sk_buff *buff;
2937   struct sockaddr_in sin;
2938   struct device *dev=NULL;
2939   unsigned char *ptr;
2940   int tmp;
2941   struct tcphdr *t1;
2942   int err;
2943 
2944   if (sk->state != TCP_CLOSE) return(-EISCONN);
2945   if (addr_len < 8) return(-EINVAL);
2946 
2947   err=verify_area(VERIFY_READ, usin, addr_len);
2948   if(err)
2949         return err;
2950         
2951   memcpy_fromfs(&sin,usin, min(sizeof(sin), addr_len));
2952 
2953   if (sin.sin_family && sin.sin_family != AF_INET) return(-EAFNOSUPPORT);
2954 
2955   DPRINTF((DBG_TCP, "TCP connect daddr=%s\n", in_ntoa(sin.sin_addr.s_addr)));
2956   
2957   /* Don't want a TCP connection going to a broadcast address */
2958   if (chk_addr(sin.sin_addr.s_addr) == IS_BROADCAST) { 
2959         DPRINTF((DBG_TCP, "TCP connection to broadcast address not allowed\n"));
2960         return(-ENETUNREACH);
2961   }
2962   
2963   /* Connect back to the same socket: Blows up so disallow it */
2964   if(sk->saddr == sin.sin_addr.s_addr && sk->num==ntohs(sin.sin_port))
2965         return -EBUSY;
2966 
2967   sk->inuse = 1;
2968   sk->daddr = sin.sin_addr.s_addr;
2969   sk->send_seq = jiffies * SEQ_TICK - seq_offset;
2970   sk->window_seq = sk->send_seq;
2971   sk->rcv_ack_seq = sk->send_seq -1;
2972   sk->err = 0;
2973   sk->dummy_th.dest = sin.sin_port;
2974   release_sock(sk);
2975 
2976   buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
2977   if (buff == NULL) {
2978         return(-ENOMEM);
2979   }
2980   sk->inuse = 1;
2981   buff->mem_addr = buff;
2982   buff->mem_len = MAX_SYN_SIZE;
2983   buff->len = 24;
2984   buff->sk = sk;
2985   buff->free = 1;
2986   t1 = (struct tcphdr *) buff->data;
2987 
2988   /* Put in the IP header and routing stuff. */
2989   /* We need to build the routing stuff fromt the things saved in skb. */
2990   tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
2991                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2992   if (tmp < 0) {
2993         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
2994         release_sock(sk);
2995         return(-ENETUNREACH);
2996   }
2997   buff->len += tmp;
2998   t1 = (struct tcphdr *)((char *)t1 +tmp);
2999 
3000   memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3001   t1->seq = ntohl(sk->send_seq++);
3002   buff->h.seq = sk->send_seq;
3003   t1->ack = 0;
3004   t1->window = 2;
3005   t1->res1=0;
3006   t1->res2=0;
3007   t1->rst = 0;
3008   t1->urg = 0;
3009   t1->psh = 0;
3010   t1->syn = 1;
3011   t1->urg_ptr = 0;
3012   t1->doff = 6;
3013 
3014 /* use 512 or whatever user asked for */
3015   if (sk->user_mss)
3016     sk->mtu = sk->user_mss;
3017   else {
3018 #ifdef SUBNETSARELOCAL
3019     if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3020 #else
3021     if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3022 #endif
3023       sk->mtu = 576 - HEADER_SIZE;
3024     else
3025       sk->mtu = MAX_WINDOW;
3026   }
3027 /* but not bigger than device MTU */
3028   sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3029 
3030   /* Put in the TCP options to say MTU. */
3031   ptr = (unsigned char *)(t1+1);
3032   ptr[0] = 2;
3033   ptr[1] = 4;
3034   ptr[2] = (sk->mtu) >> 8;
3035   ptr[3] = (sk->mtu) & 0xff;
3036   tcp_send_check(t1, sk->saddr, sk->daddr,
3037                   sizeof(struct tcphdr) + 4, sk);
3038 
3039   /* This must go first otherwise a really quick response will get reset. */
3040   sk->state = TCP_SYN_SENT;
3041   sk->rtt = TCP_CONNECT_TIME;
3042   reset_timer(sk, TIME_WRITE, TCP_CONNECT_TIME);        /* Timer for repeating the SYN until an answer */
3043   sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3044 
3045   sk->prot->queue_xmit(sk, dev, buff, 0);  
3046   
3047   release_sock(sk);
3048   return(0);
3049 }
3050 
3051 
3052 /* This functions checks to see if the tcp header is actually acceptible. */
3053 static int
3054 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3055              struct options *opt, unsigned long saddr, struct device *dev)
3056 {
3057   /*
3058    * This isn't quite right.  sk->acked_seq could be more recent
3059    * than sk->window.  This is however close enough.  We will accept
3060    * slightly more packets than we should, but it should not cause
3061    * problems unless someone is trying to forge packets.
3062    */
3063   DPRINTF((DBG_TCP, "tcp_sequence(sk=%X, th=%X, len = %d, opt=%d, saddr=%X)\n",
3064           sk, th, len, opt, saddr));
3065 
3066   if (between(th->seq, sk->acked_seq, sk->acked_seq + sk->window)||
3067       between(th->seq + len-(th->doff*4), sk->acked_seq + 1,
3068               sk->acked_seq + sk->window) ||
3069      (before(th->seq, sk->acked_seq) &&
3070        after(th->seq + len -(th->doff*4), sk->acked_seq + sk->window))) {
3071        return(1);
3072    }
3073   DPRINTF((DBG_TCP, "tcp_sequence: rejecting packet.\n"));
3074 
3075   /*
3076    *    Send a reset if we get something not ours and we are
3077    *    unsynchronized. Note: We don't do anything to our end. We
3078    *    are just killing the bogus remote connection then we will
3079    *    connect again and it will work (with luck).
3080    */
3081          
3082   if(sk->state==TCP_SYN_SENT||sk->state==TCP_SYN_RECV)
3083   {
3084         tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3085         return(1);
3086   }
3087 
3088   /*
3089    * If it's too far ahead, send an ack to let the
3090    * other end know what we expect.
3091    */
3092   if (after(th->seq, sk->acked_seq + sk->window)) {
3093         if(!th->rst)
3094                 tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
3095         return(0);
3096   }
3097 
3098 #ifdef undef
3099 /*
3100  * if we do this, we won't respond to keepalive packets, since those
3101  * are slightly out of window, and we have to generate an ack
3102  * a late ack out still not to have a sequence number less than
3103  * one we've seen before.  Berkeley doesn't seem to do this, but it's
3104  * always hard to be sure.
3105  */
3106   /* In case it's just a late ack, let it through. */
3107   if (th->ack && len == (th->doff * 4) &&
3108       after(th->seq, sk->acked_seq - 32767) &&
3109       !th->fin && !th->syn) return(1);
3110 #endif
3111 
3112   if (!th->rst) {
3113         /* Try to resync things. */
3114         tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
3115   }
3116   return(0);
3117 }
3118 
3119 
3120 
3121 
3122 
3123 int
3124 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3125         unsigned long daddr, unsigned short len,
3126         unsigned long saddr, int redo, struct inet_protocol * protocol)
3127 {
3128   struct tcphdr *th;
3129   struct sock *sk;
3130 
3131   if (!skb) {
3132         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv skb = NULL\n"));
3133         return(0);
3134   }
3135 #if 0   /* FIXME: it's ok for protocol to be NULL */
3136   if (!protocol) {
3137         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv protocol = NULL\n"));
3138         return(0);
3139   }
3140 
3141   if (!opt) {   /* FIXME: it's ok for opt to be NULL */
3142         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv opt = NULL\n"));
3143   }
3144 #endif
3145   if (!dev) {
3146         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv dev = NULL\n"));
3147         return(0);
3148   }
3149   th = skb->h.th;
3150 
3151   /* Find the socket. */
3152   sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3153   DPRINTF((DBG_TCP, "<<\n"));
3154   DPRINTF((DBG_TCP, "len = %d, redo = %d, skb=%X\n", len, redo, skb));
3155   
3156   /* If this socket has got a reset its to all intents and purposes 
3157      really dead */
3158   if (sk!=NULL && sk->zapped)
3159         sk=NULL;
3160 
3161   if (sk) {
3162          DPRINTF((DBG_TCP, "sk = %X:\n", sk));
3163   }
3164 
3165   if (!redo) {
3166         if (tcp_check(th, len, saddr, daddr )) {
3167                 skb->sk = NULL;
3168                 DPRINTF((DBG_TCP, "packet dropped with bad checksum.\n"));
3169 if (inet_debug == DBG_SLIP) printk("\rtcp_rcv: bad checksum\n");
3170                 kfree_skb(skb,FREE_READ);
3171                 /*
3172                  * We don't release the socket because it was
3173                  * never marked in use.
3174                  */
3175                 return(0);
3176         }
3177 
3178         /* See if we know about the socket. */
3179         if (sk == NULL) {
3180                 if (!th->rst) 
3181                 {       
3182                         th->seq = ntohl(th->seq);
3183                         /* So reset is always called with th->seq in host order */
3184                         tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3185                 }
3186                 skb->sk = NULL;
3187                 kfree_skb(skb, FREE_READ);
3188                 return(0);
3189         }
3190 
3191         skb->len = len;
3192         skb->sk = sk;
3193         skb->acked = 0;
3194         skb->used = 0;
3195         skb->free = 0;
3196         skb->saddr = daddr;
3197         skb->daddr = saddr;
3198 
3199         th->seq = ntohl(th->seq);
3200 
3201         /* We may need to add it to the backlog here. */
3202         cli();
3203         if (sk->inuse) {
3204                 if (sk->back_log == NULL) {
3205                         sk->back_log = skb;
3206                         skb->next = skb;
3207                         skb->prev = skb;
3208                 } else {
3209                         skb->next = sk->back_log;
3210                         skb->prev = sk->back_log->prev;
3211                         skb->prev->next = skb;
3212                         skb->next->prev = skb;
3213                 }
3214                 sti();
3215                 return(0);
3216         }
3217         sk->inuse = 1;
3218         sti();
3219   } else {
3220         if (!sk) {
3221                 DPRINTF((DBG_TCP, "tcp.c: tcp_rcv bug sk=NULL redo = 1\n"));
3222                 return(0);
3223         }
3224   }
3225 
3226   if (!sk->prot) {
3227         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv sk->prot = NULL \n"));
3228         return(0);
3229   }
3230 
3231   /* Charge the memory to the socket. */
3232   if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) {
3233         skb->sk = NULL;
3234         DPRINTF((DBG_TCP, "dropping packet due to lack of buffer space.\n"));
3235         kfree_skb(skb, FREE_READ);
3236         release_sock(sk);
3237         return(0);
3238   }
3239   sk->rmem_alloc += skb->mem_len;
3240 
3241   DPRINTF((DBG_TCP, "About to do switch.\n"));
3242 
3243   /* Now deal with it. */
3244   switch(sk->state) {
3245         /*
3246          * This should close the system down if it's waiting
3247          * for an ack that is never going to be sent.
3248          */
3249         case TCP_LAST_ACK:
3250                 if (th->rst) {
3251                         sk->zapped=1;
3252                         sk->err = ECONNRESET;
3253                         sk->state = TCP_CLOSE;
3254                         sk->shutdown = SHUTDOWN_MASK;
3255                         if (!sk->dead) {
3256                                 sk->state_change(sk);
3257                         }
3258                         kfree_skb(skb, FREE_READ);
3259                         release_sock(sk);
3260                         return(0);
3261                 }
3262 
3263         case TCP_ESTABLISHED:
3264         case TCP_CLOSE_WAIT:
3265         case TCP_FIN_WAIT1:
3266         case TCP_FIN_WAIT2:
3267         case TCP_TIME_WAIT:
3268                 if (!tcp_sequence(sk, th, len, opt, saddr,dev)) {
3269 if (inet_debug == DBG_SLIP) printk("\rtcp_rcv: not in seq\n");
3270 #ifdef undef
3271 /* nice idea, but tcp_sequence already does this.  Maybe it shouldn't?? */
3272                         if(!th->rst)
3273                                 tcp_send_ack(sk->send_seq, sk->acked_seq, 
3274                                      sk, th, saddr);
3275 #endif
3276                         kfree_skb(skb, FREE_READ);
3277                         release_sock(sk);
3278                         return(0);
3279                 }
3280 
3281                 if (th->rst) {
3282                         sk->zapped=1;
3283                         /* This means the thing should really be closed. */
3284                         sk->err = ECONNRESET;
3285 
3286                         if (sk->state == TCP_CLOSE_WAIT) {
3287                                 sk->err = EPIPE;
3288                         }
3289 
3290                         /*
3291                          * A reset with a fin just means that
3292                          * the data was not all read.
3293                          */
3294                         sk->state = TCP_CLOSE;
3295                         sk->shutdown = SHUTDOWN_MASK;
3296                         if (!sk->dead) {
3297                                 sk->state_change(sk);
3298                         }
3299                         kfree_skb(skb, FREE_READ);
3300                         release_sock(sk);
3301                         return(0);
3302                 }
3303                 if (
3304 #if 0
3305                 if ((opt && (opt->security != 0 ||
3306                             opt->compartment != 0)) || 
3307 #endif
3308                                  th->syn) {
3309                         sk->err = ECONNRESET;
3310                         sk->state = TCP_CLOSE;
3311                         sk->shutdown = SHUTDOWN_MASK;
3312                         tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
3313                         if (!sk->dead) {
3314                                 sk->state_change(sk);
3315                         }
3316                         kfree_skb(skb, FREE_READ);
3317                         release_sock(sk);
3318                         return(0);
3319                 }
3320                 if (th->ack) {
3321                         if (!tcp_ack(sk, th, saddr, len)) {
3322                                 kfree_skb(skb, FREE_READ);
3323                                 release_sock(sk);
3324                                 return(0);
3325                         }
3326                 }
3327 
3328                 if (tcp_urg(sk, th, saddr, len)) {
3329                         kfree_skb(skb, FREE_READ);
3330                         release_sock(sk);
3331                         return(0);
3332                 }
3333 
3334                 if (tcp_data(skb, sk, saddr, len)) {
3335                         kfree_skb(skb, FREE_READ);
3336                         release_sock(sk);
3337                         return(0);
3338                 }
3339 
3340                 /* Moved: you must do data then fin bit */
3341                 if (th->fin && tcp_fin(sk, th, saddr, dev)) {
3342                         kfree_skb(skb, FREE_READ);
3343                         release_sock(sk);
3344                         return(0);
3345                 }
3346 
3347                 release_sock(sk);
3348                 return(0);
3349 
3350         case TCP_CLOSE:
3351                 if (sk->dead || sk->daddr) {
3352                         DPRINTF((DBG_TCP, "packet received for closed,dead socket\n"));
3353                         kfree_skb(skb, FREE_READ);
3354                         release_sock(sk);
3355                         return(0);
3356                 }
3357 
3358                 if (!th->rst) {
3359                         if (!th->ack)
3360                                 th->ack_seq = 0;
3361                         tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3362                 }
3363                 kfree_skb(skb, FREE_READ);
3364                 release_sock(sk);
3365                 return(0);
3366 
3367         case TCP_LISTEN:
3368                 if (th->rst) {
3369                         kfree_skb(skb, FREE_READ);
3370                         release_sock(sk);
3371                         return(0);
3372                 }
3373                 if (th->ack) {
3374                         tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3375                         kfree_skb(skb, FREE_READ);
3376                         release_sock(sk);
3377                         return(0);
3378                 }
3379 
3380                 if (th->syn) {
3381 #if 0
3382                         if (opt->security != 0 || opt->compartment != 0) {
3383                                 tcp_reset(daddr, saddr, th, prot, opt,dev);
3384                                 release_sock(sk);
3385                                 return(0);
3386                         }
3387 #endif
3388 
3389                         /*
3390                          * Now we just put the whole thing including
3391                          * the header and saddr, and protocol pointer
3392                          * into the buffer.  We can't respond until the
3393                          * user tells us to accept the connection.
3394                          */
3395                         tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
3396                         release_sock(sk);
3397                         return(0);
3398                 }
3399 
3400                 kfree_skb(skb, FREE_READ);
3401                 release_sock(sk);
3402                 return(0);
3403 
3404         default:
3405                 if (!tcp_sequence(sk, th, len, opt, saddr,dev)) {
3406                         kfree_skb(skb, FREE_READ);
3407                         release_sock(sk);
3408                         return(0);
3409                 }
3410 
3411         case TCP_SYN_SENT:
3412                 if (th->rst) {
3413                         sk->err = ECONNREFUSED;
3414                         sk->state = TCP_CLOSE;
3415                         sk->shutdown = SHUTDOWN_MASK;
3416                         sk->zapped = 1;
3417                         if (!sk->dead) {
3418                                 sk->state_change(sk);
3419                         }
3420                         kfree_skb(skb, FREE_READ);
3421                         release_sock(sk);
3422                         return(0);
3423                 }
3424 #if 0
3425                 if (opt->security != 0 || opt->compartment != 0) {
3426                         sk->err = ECONNRESET;
3427                         sk->state = TCP_CLOSE;
3428                         sk->shutdown = SHUTDOWN_MASK;
3429                         tcp_reset(daddr, saddr,  th, sk->prot, opt, dev);
3430                         if (!sk->dead) {
3431                                 wake_up_interruptible(sk->sleep);
3432                         }
3433                         kfree_skb(skb, FREE_READ);
3434                         release_sock(sk);
3435                         return(0);
3436                 }
3437 #endif
3438                 if (!th->ack) {
3439                         if (th->syn) {
3440                                 sk->state = TCP_SYN_RECV;
3441                         }
3442 
3443                         kfree_skb(skb, FREE_READ);
3444                         release_sock(sk);
3445                         return(0);
3446                 }
3447 
3448                 switch(sk->state) {
3449                         case TCP_SYN_SENT:
3450                                 if (!tcp_ack(sk, th, saddr, len)) {
3451                                         tcp_reset(daddr, saddr, th,
3452                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3453                                         kfree_skb(skb, FREE_READ);
3454                                         release_sock(sk);
3455                                         return(0);
3456                                 }
3457 
3458                                 /*
3459                                  * If the syn bit is also set, switch to
3460                                  * tcp_syn_recv, and then to established.
3461                                  */
3462                                 if (!th->syn) {
3463                                         kfree_skb(skb, FREE_READ);
3464                                         release_sock(sk);
3465                                         return(0);
3466                                 }
3467 
3468                                 /* Ack the syn and fall through. */
3469                                 sk->acked_seq = th->seq+1;
3470                                 sk->fin_seq = th->seq;
3471                                 tcp_send_ack(sk->send_seq, th->seq+1,
3472                                                         sk, th, sk->daddr);
3473         
3474                         case TCP_SYN_RECV:
3475                                 if (!tcp_ack(sk, th, saddr, len)) {
3476                                         tcp_reset(daddr, saddr, th,
3477                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
3478                                         kfree_skb(skb, FREE_READ);
3479                                         release_sock(sk);
3480                                         return(0);
3481                                 }
3482                                 sk->state = TCP_ESTABLISHED;
3483 
3484                                 /*
3485                                  * Now we need to finish filling out
3486                                  * some of the tcp header.
3487                                  */
3488                                 /* We need to check for mtu info. */
3489                                 tcp_options(sk, th);
3490                                 sk->dummy_th.dest = th->source;
3491                                 sk->copied_seq = sk->acked_seq-1;
3492                                 if (!sk->dead) {
3493                                         sk->state_change(sk);
3494                                 }
3495 
3496                                 /*
3497                                  * We've already processed his first
3498                                  * ack.  In just about all cases that
3499                                  * will have set max_window.  This is
3500                                  * to protect us against the possibility
3501                                  * that the initial window he sent was 0.
3502                                  * This must occur after tcp_options, which
3503                                  * sets sk->mtu.
3504                                  */
3505                                 if (sk->max_window == 0) {
3506                                   sk->max_window = 32;
3507                                   sk->mss = min(sk->max_window, sk->mtu);
3508                                 }
3509 
3510                                 /*
3511                                  * Now process the rest like we were
3512                                  * already in the established state.
3513                                  */
3514                                 if (th->urg) {
3515                                         if (tcp_urg(sk, th, saddr, len)) { 
3516                                                 kfree_skb(skb, FREE_READ);
3517                                                 release_sock(sk);
3518                                                 return(0);
3519                                         }
3520                         }
3521                         if (tcp_data(skb, sk, saddr, len))
3522                                                 kfree_skb(skb, FREE_READ);
3523 
3524                         if (th->fin) tcp_fin(sk, th, saddr, dev);
3525                         release_sock(sk);
3526                         return(0);
3527                 }
3528 
3529                 if (th->urg) {
3530                         if (tcp_urg(sk, th, saddr, len)) {
3531                                 kfree_skb(skb, FREE_READ);
3532                                 release_sock(sk);
3533                                 return(0);
3534                         }
3535                 }
3536 
3537                 if (tcp_data(skb, sk, saddr, len)) {
3538                         kfree_skb(skb, FREE_READ);
3539                         release_sock(sk);
3540                         return(0);
3541                 }
3542 
3543                 if (!th->fin) {
3544                         release_sock(sk);
3545                         return(0);
3546                 }
3547                 tcp_fin(sk, th, saddr, dev);
3548                 release_sock(sk);
3549                 return(0);
3550         }
3551 }
3552 
3553 
3554 /*
3555   * This routine sends a packet with an out of date sequence
3556   * number. It assumes the other end will try to ack it.
3557   */
3558 static void
3559 tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3560 {
3561   struct sk_buff *buff;
3562   struct tcphdr *t1;
3563   struct device *dev=NULL;
3564   int tmp;
3565 
3566   if (sk->zapped)
3567         return; /* Afer a valid reset we can send no more */
3568 
3569   if (sk -> state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) return;
3570 
3571   buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
3572   if (buff == NULL) return;
3573 
3574   buff->mem_addr = buff;
3575   buff->mem_len = MAX_ACK_SIZE;
3576   buff->len = sizeof(struct tcphdr);
3577   buff->free = 1;
3578   buff->sk = sk;
3579   DPRINTF((DBG_TCP, "in tcp_write_wakeup\n"));
3580   t1 = (struct tcphdr *) buff->data;
3581 
3582   /* Put in the IP header and routing stuff. */
3583   tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3584                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
3585   if (tmp < 0) {
3586         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3587         return;
3588   }
3589 
3590   buff->len += tmp;
3591   t1 = (struct tcphdr *)((char *)t1 +tmp);
3592 
3593   memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
3594 
3595   /*
3596    * Use a previous sequence.
3597    * This should cause the other end to send an ack.
3598    */
3599   t1->seq = ntohl(sk->send_seq-1);
3600   t1->ack = 1; 
3601   t1->res1= 0;
3602   t1->res2= 0;
3603   t1->rst = 0;
3604   t1->urg = 0;
3605   t1->psh = 0;
3606   t1->fin = 0;
3607   t1->syn = 0;
3608   t1->ack_seq = ntohl(sk->acked_seq);
3609   t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
3610   t1->doff = sizeof(*t1)/4;
3611   tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
3612 
3613   /* Send it and free it.
3614    * This will prevent the timer from automatically being restarted.
3615   */
3616   sk->prot->queue_xmit(sk, dev, buff, 1);
3617 }
3618 
3619 /*
3620  * This routine probes a zero window.  It makes a copy of the first
3621  * packet in the write queue, but with just one byte of data.
3622  */
3623 void
3624 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3625 {
3626   unsigned char *raw;
3627   struct iphdr *iph;
3628   struct sk_buff *skb2, *skb;
3629   int len, hlen, data;
3630   struct tcphdr *t1;
3631   struct device *dev;
3632 
3633   if (sk->zapped)
3634         return; /* Afer a valid reset we can send no more */
3635 
3636   if (sk -> state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT &&
3637       sk -> state != TCP_FIN_WAIT1 && sk->state != TCP_FIN_WAIT2)
3638         return;
3639 
3640   skb = sk->wfront;
3641   if (skb == NULL)
3642         return;
3643 
3644   dev = skb->dev;
3645   /* I know this can't happen but as it does.. */
3646   if(dev==NULL)
3647     {
3648       printk("tcp_send_probe0: NULL device bug!\n");
3649       return;
3650     }
3651   IS_SKB(skb);
3652 
3653   raw = skb->data;
3654   iph = (struct iphdr *) (raw + dev->hard_header_len);
3655 
3656   hlen = (iph->ihl * sizeof(unsigned long)) + dev->hard_header_len;
3657   data = skb->len - hlen - sizeof(struct tcphdr);
3658   len = hlen + sizeof(struct tcphdr) + (data ? 1 : 0);
3659         
3660   /* Allocate buffer. */
3661   if ((skb2 = alloc_skb(sizeof(struct sk_buff) + len, GFP_ATOMIC)) == NULL) {
3662 /*    printk("alloc failed raw %x th %x hlen %d data %d len %d\n",
3663            raw, skb->h.th, hlen, data, len); */
3664     reset_timer (sk, TIME_PROBE0, 10);  /* try again real soon */
3665     return;
3666   }
3667 
3668   skb2->arp = skb->arp;
3669   skb2->len = len;
3670   skb2->h.raw = (char *)(skb2->data);
3671  
3672   sk->wmem_alloc += skb2->mem_len;
3673  
3674   /* Copy the packet header into the new buffer. */
3675   memcpy(skb2->h.raw, raw, len);
3676  
3677   skb2->h.raw += hlen;  /* it's now h.th -- pointer to the tcp header */
3678   t1 = skb2->h.th;
3679  
3680 /* source, dest, seq, from existing packet */
3681   t1->ack_seq = ntohl(sk->acked_seq);
3682   t1->res1 = 0;
3683 /* doff, fin, from existing packet.  Fin is safe because Linux always
3684  * sends fin in a separate packet
3685  * syn, rst, had better be zero in original */
3686   t1->ack = 1;
3687   t1->urg = 0;  /* urgent pointer might be beyond this fragment */
3688   t1->res2 = 0;
3689   t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
3690   t1->urg_ptr = 0;
3691   tcp_send_check(t1, sk->saddr, sk->daddr, len - hlen, sk);
3692   /* Send it and free it.
3693    * This will prevent the timer from automatically being restarted.
3694    */
3695   sk->prot->queue_xmit(sk, dev, skb2, 1);
3696   sk->backoff++;
3697   /*
3698    * in the case of retransmissions, there's good reason to limit
3699    * rto to 120 sec, as that's the maximum legal RTT on the Internet.
3700    * For probes it could reasonably be longer.  However making it
3701    * much longer could cause unacceptable delays in some situation,
3702    * so we might as well use the same value
3703    */
3704   sk->rto = min(sk->rto << 1, 120*HZ);
3705   reset_timer (sk, TIME_PROBE0, sk->rto);
3706   sk->retransmits++;
3707   sk->prot->retransmits ++;
3708 }
3709 
3710 /*
3711  *      Socket option code for TCP. 
3712  */
3713   
3714 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
3715 {
3716         int val,err;
3717 
3718         if(level!=SOL_TCP)
3719                 return ip_setsockopt(sk,level,optname,optval,optlen);
3720 
3721         if (optval == NULL) 
3722                 return(-EINVAL);
3723 
3724         err=verify_area(VERIFY_READ, optval, sizeof(int));
3725         if(err)
3726                 return err;
3727         
3728         val = get_fs_long((unsigned long *)optval);
3729 
3730         switch(optname)
3731         {
3732                 case TCP_MAXSEG:
3733 /*                      if(val<200||val>2048 || val>sk->mtu) */
3734 /*
3735  * values greater than interface MTU won't take effect.  however at
3736  * the point when this call is done we typically don't yet know
3737  * which interface is going to be used
3738  */
3739                         if(val<1||val>MAX_WINDOW)
3740                                 return -EINVAL;
3741                         sk->user_mss=val;
3742                         return 0;
3743                 case TCP_NODELAY:
3744                         sk->nonagle=(val==0)?0:1;
3745                         return 0;
3746                 default:
3747                         return(-ENOPROTOOPT);
3748         }
3749 }
3750 
3751 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
3752 {
3753         int val,err;
3754 
3755         if(level!=SOL_TCP)
3756                 return ip_getsockopt(sk,level,optname,optval,optlen);
3757                         
3758         switch(optname)
3759         {
3760                 case TCP_MAXSEG:
3761                         val=sk->user_mss;
3762                         break;
3763                 case TCP_NODELAY:
3764                         val=sk->nonagle;        /* Until Johannes stuff is in */
3765                         break;
3766                 default:
3767                         return(-ENOPROTOOPT);
3768         }
3769         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
3770         if(err)
3771                 return err;
3772         put_fs_long(sizeof(int),(unsigned long *) optlen);
3773 
3774         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
3775         if(err)
3776                 return err;
3777         put_fs_long(val,(unsigned long *)optval);
3778 
3779         return(0);
3780 }       
3781 
3782 
3783 struct proto tcp_prot = {
3784   sock_wmalloc,
3785   sock_rmalloc,
3786   sock_wfree,
3787   sock_rfree,
3788   sock_rspace,
3789   sock_wspace,
3790   tcp_close,
3791   tcp_read,
3792   tcp_write,
3793   tcp_sendto,
3794   tcp_recvfrom,
3795   ip_build_header,
3796   tcp_connect,
3797   tcp_accept,
3798   ip_queue_xmit,
3799   tcp_retransmit,
3800   tcp_write_wakeup,
3801   tcp_read_wakeup,
3802   tcp_rcv,
3803   tcp_select,
3804   tcp_ioctl,
3805   NULL,
3806   tcp_shutdown,
3807   tcp_setsockopt,
3808   tcp_getsockopt,
3809   128,
3810   0,
3811   {NULL,},
3812   "TCP"
3813 };

/* [previous][next][first][last][top][bottom][index][help] */