root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. print_th
  3. get_firstr
  4. diff
  5. tcp_select_window
  6. tcp_time_wait
  7. tcp_retransmit
  8. tcp_err
  9. tcp_readable
  10. tcp_select
  11. tcp_ioctl
  12. tcp_check
  13. tcp_send_check
  14. tcp_send_skb
  15. tcp_dequeue_partial
  16. tcp_send_partial
  17. tcp_enqueue_partial
  18. tcp_send_ack
  19. tcp_build_header
  20. tcp_write
  21. tcp_sendto
  22. tcp_read_wakeup
  23. cleanup_rbuf
  24. tcp_read_urg
  25. tcp_read
  26. tcp_shutdown
  27. tcp_recvfrom
  28. tcp_reset
  29. tcp_options
  30. default_mask
  31. tcp_conn_request
  32. tcp_close
  33. tcp_write_xmit
  34. sort_send
  35. tcp_ack
  36. tcp_data
  37. tcp_urg
  38. tcp_fin
  39. tcp_accept
  40. tcp_connect
  41. tcp_sequence
  42. tcp_rcv
  43. tcp_write_wakeup
  44. tcp_send_probe0
  45. tcp_setsockopt
  46. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *
  16  * Fixes:       
  17  *              Alan Cox        :       Numerous verify_area() calls
  18  *              Alan Cox        :       Set the ACK bit on a reset
  19  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  20  *                                      and was trying to connect (tcp_err()).
  21  *              Alan Cox        :       All icmp error handling was broken
  22  *                                      pointers passed where wrong and the
  23  *                                      socket was looked up backwards. Nobody
  24  *                                      tested any icmp error code obviously.
  25  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  26  *                                      on errors. select behaves and the icmp error race
  27  *                                      has gone by moving it into sock.c
  28  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  29  *                                      packets for unknown sockets.
  30  *              Alan Cox        :       tcp option processing.
  31  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  32  *              Herp Rosmanith  :       More reset fixes
  33  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  34  *                                      any kind of RST is right out.
  35  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  36  *                                      otherwise odd bits of prattle escape still
  37  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  38  *                                      LAN workplace lockups.
  39  *              Alan Cox        :       Some tidyups using the new skb list facilities
  40  *              Alan Cox        :       sk->keepopen now seems to work
  41  *              Alan Cox        :       Pulls options out correctly on accepts
  42  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  43  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  44  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  45  *              Alan Cox        :       Added some beter commenting, as the tcp is hard to follow
  46  *              Alan Cox        :       Removed incorrect check for 20 * psh
  47  *      Michael O'Reilly        :       ack < copied bug fix.
  48  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  49  *              Alan Cox        :       FIN with no memory -> CRASH
  50  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  51  *              Alan Cox        :       Added TCP options (SOL_TCP)
  52  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  53  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  54  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  55  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  56  *              Alan Cox        :       Put in missing check for SYN bit.
  57  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  58  *                                      window non shrink trick.
  59  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  60  *              Charles Hedrick :       TCP fixes
  61  *              Toomas Tamm     :       TCP window fixes
  62  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  63  *
  64  *
  65  * To Fix:
  66  *                      Possibly a problem with accept(). BSD accept never fails after
  67  *              it causes a select. Linux can - given the official select semantics I
  68  *              feel that _really_ its the BSD network programs that are bust (notably
  69  *              inetd, which hangs occasionally because of this).
  70  *                      Add VJ Fastrecovery algorithm ?
  71  *                      Protocol closedown badly messed up.
  72  *                      Incompatiblity with spider ports (tcp hangs on that 
  73  *                      socket occasionally).
  74  *              MSG_PEEK and read on same socket at once can cause crashes.
  75  *
  76  *              This program is free software; you can redistribute it and/or
  77  *              modify it under the terms of the GNU General Public License
  78  *              as published by the Free Software Foundation; either version
  79  *              2 of the License, or(at your option) any later version.
  80  */
  81 #include <linux/types.h>
  82 #include <linux/sched.h>
  83 #include <linux/mm.h>
  84 #include <linux/string.h>
  85 #include <linux/socket.h>
  86 #include <linux/sockios.h>
  87 #include <linux/termios.h>
  88 #include <linux/in.h>
  89 #include <linux/fcntl.h>
  90 #include "inet.h"
  91 #include "dev.h"
  92 #include "ip.h"
  93 #include "protocol.h"
  94 #include "icmp.h"
  95 #include "tcp.h"
  96 #include "skbuff.h"
  97 #include "sock.h"
  98 #include "arp.h"
  99 #include <linux/errno.h>
 100 #include <linux/timer.h>
 101 #include <asm/system.h>
 102 #include <asm/segment.h>
 103 #include <linux/mm.h>
 104 
 105 #define SEQ_TICK 3
 106 unsigned long seq_offset;
 107 #define SUBNETSARELOCAL
 108 
 109 static __inline__ int 
 110 min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 111 {
 112   if (a < b) return(a);
 113   return(b);
 114 }
 115 
 116 
 117 void
 118 print_th(struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
 119 {
 120   unsigned char *ptr;
 121 
 122   if (inet_debug != DBG_TCP) return;
 123 
 124   printk("TCP header:\n");
 125   ptr =(unsigned char *)(th + 1);
 126   printk("    source=%d, dest=%d, seq =%ld, ack_seq = %ld\n",
 127         ntohs(th->source), ntohs(th->dest),
 128         ntohl(th->seq), ntohl(th->ack_seq));
 129   printk("    fin=%d, syn=%d, rst=%d, psh=%d, ack=%d, urg=%d res1=%d res2=%d\n",
 130         th->fin, th->syn, th->rst, th->psh, th->ack,
 131         th->urg, th->res1, th->res2);
 132   printk("    window = %d, check = %d urg_ptr = %d\n",
 133         ntohs(th->window), ntohs(th->check), ntohs(th->urg_ptr));
 134   printk("    doff = %d\n", th->doff);
 135   printk("    options = %d %d %d %d\n", ptr[0], ptr[1], ptr[2], ptr[3]);
 136  }
 137 
 138 
 139 
 140 /* This routine grabs the first thing off of a rcv queue. */
 141 static struct sk_buff *
 142 get_firstr(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 143 {
 144   return skb_dequeue(&sk->rqueue);
 145 }
 146 
 147 /*
 148  *      Difference between two values in tcp ack terms.
 149  */
 150 
 151 static long
 152 diff(unsigned long seq1, unsigned long seq2)
     /* [previous][next][first][last][top][bottom][index][help] */
 153 {
 154   long d;
 155 
 156   d = seq1 - seq2;
 157   if (d > 0) return(d);
 158 
 159   /* I hope this returns what I want. */
 160   return(~d+1);
 161 }
 162 
 163 /* This routine picks a TCP windows for a socket based on
 164    the following constraints
 165    
 166    1. The window can never be shrunk once it is offered (RFC 793)
 167    2. We limit memory per socket
 168    
 169    For now we use NET2E3's heuristic of offering half the memory
 170    we have handy. All is not as bad as this seems however because
 171    of two things. Firstly we will bin packets even within the window
 172    in order to get the data we are waiting for into the memory limit.
 173    Secondly we bin common duplicate forms at receive time
 174 
 175    Better heuristics welcome
 176 */
 177    
 178 static int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 179 {
 180         int new_window = sk->prot->rspace(sk);
 181 
 182 /*
 183  * two things are going on here.  First, we don't ever offer a
 184  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 185  * receiver side of SWS as specified in RFC1122.
 186  * Second, we always give them at least the window they
 187  * had before, in order to avoid retracting window.  This
 188  * is technically allowed, but RFC1122 advises against it and
 189  * in practice it causes trouble.
 190  */
 191         if (new_window < min(sk->mss, MAX_WINDOW/2) ||
 192             new_window < sk->window)
 193           return(sk->window);
 194         return(new_window);
 195 }
 196 
 197 /* Enter the time wait state. */
 198 
 199 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 200 {
 201   sk->state = TCP_TIME_WAIT;
 202   sk->shutdown = SHUTDOWN_MASK;
 203   if (!sk->dead)
 204         sk->state_change(sk);
 205   reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 206 }
 207 
 208 /*
 209  *      A timer event has trigger a tcp retransmit timeout. The
 210  *      socket xmit queue is ready and set up to send. Because
 211  *      the ack receive code keeps the queue straight we do
 212  *      nothing clever here.
 213  */
 214 
 215 static void
 216 tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 217 {
 218   if (all) {
 219         ip_retransmit(sk, all);
 220         return;
 221   }
 222 
 223   sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 224   /* sk->ssthresh in theory can be zero.  I guess that's OK */
 225   sk->cong_count = 0;
 226 
 227   sk->cong_window = 1;
 228 
 229   /* Do the actual retransmit. */
 230   ip_retransmit(sk, all);
 231 }
 232 
 233 
 234 /*
 235  * This routine is called by the ICMP module when it gets some
 236  * sort of error condition.  If err < 0 then the socket should
 237  * be closed and the error returned to the user.  If err > 0
 238  * it's just the icmp type << 8 | icmp code.  After adjustment
 239  * header points to the first 8 bytes of the tcp header.  We need
 240  * to find the appropriate port.
 241  */
 242 void
 243 tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 244         unsigned long saddr, struct inet_protocol *protocol)
 245 {
 246   struct tcphdr *th;
 247   struct sock *sk;
 248   struct iphdr *iph=(struct iphdr *)header;
 249   
 250   header+=4*iph->ihl;
 251    
 252   DPRINTF((DBG_TCP, "TCP: tcp_err(%d, hdr=%X, daddr=%X saddr=%X, protocol=%X)\n",
 253                                         err, header, daddr, saddr, protocol));
 254 
 255   th =(struct tcphdr *)header;
 256   sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr);
 257   print_th(th);
 258 
 259   if (sk == NULL) return;
 260   
 261   if(err<0)
 262   {
 263         sk->err = -err;
 264         sk->error_report(sk);
 265         return;
 266   }
 267 
 268   if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) {
 269         /*
 270          * FIXME:
 271          * For now we will just trigger a linear backoff.
 272          * The slow start code should cause a real backoff here.
 273          */
 274         if (sk->cong_window > 4) sk->cong_window--;
 275         return;
 276   }
 277 
 278   DPRINTF((DBG_TCP, "TCP: icmp_err got error\n"));
 279   sk->err = icmp_err_convert[err & 0xff].errno;
 280 
 281   /*
 282    * If we've already connected we will keep trying
 283    * until we time out, or the user gives up.
 284    */
 285   if (icmp_err_convert[err & 0xff].fatal) {
 286         if (sk->state == TCP_SYN_SENT) {
 287                 sk->state = TCP_CLOSE;
 288                 sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 289         }
 290   }
 291   return;
 292 }
 293 
 294 
 295 /*
 296  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 297  *      in the received data queue (ie a frame missing that needs sending to us)
 298  */
 299 
 300 static int
 301 tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 302 {
 303   unsigned long counted;
 304   unsigned long amount;
 305   struct sk_buff *skb;
 306   int count=0;
 307   int sum;
 308   unsigned long flags;
 309 
 310   DPRINTF((DBG_TCP, "tcp_readable(sk=%X)\n", sk));
 311   if(sk && sk->debug)
 312         printk("tcp_readable: %p - ",sk);
 313 
 314   if (sk == NULL || skb_peek(&sk->rqueue) == NULL)      /* Empty sockets are easy! */
 315   {
 316         if(sk && sk->debug) 
 317                 printk("empty\n");
 318         return(0);
 319   }
 320   
 321   counted = sk->copied_seq+1;   /* Where we are at the moment */
 322   amount = 0;
 323   
 324   save_flags(flags);            /* So nobody adds things at the wrong moment */
 325   cli();
 326   skb =(struct sk_buff *)sk->rqueue;
 327 
 328   /* Do until a push or until we are out of data. */
 329   do {
 330         count++;
 331 #ifdef OLD      
 332         /* This is wrong: It breaks Chameleon amongst other stacks */
 333         if (count > 20) {
 334                 restore_flags(flags);
 335                 DPRINTF((DBG_TCP, "tcp_readable, more than 20 packets without a psh\n"));
 336                 printk("tcp_read: possible read_queue corruption.\n");
 337                 return(amount);
 338         }
 339 #endif  
 340         if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 341                 break;
 342         sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 343         if (skb->h.th->syn) sum++;
 344         if (skb->h.th->urg) {
 345                 sum -= ntohs(skb->h.th->urg_ptr);       /* Dont count urg data */
 346         }
 347         if (sum >= 0) {                                 /* Add it up, move on */
 348                 amount += sum;
 349                 if (skb->h.th->syn) amount--;
 350                 counted += sum;
 351         }
 352         if (amount && skb->h.th->psh) break;
 353         skb =(struct sk_buff *)skb->next;               /* Move along */
 354   } while(skb != sk->rqueue);
 355   restore_flags(flags);
 356   DPRINTF((DBG_TCP, "tcp readable returning %d bytes\n", amount));
 357   if(sk->debug)
 358         printk("got %lu bytes.\n",amount);
 359   return(amount);
 360 }
 361 
 362 
 363 /*
 364  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 365  *      listening socket has a receive queue of sockets to accept.
 366  */
 367 
 368 static int
 369 tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 370 {
 371   DPRINTF((DBG_TCP, "tcp_select(sk=%X, sel_type = %d, wait = %X)\n",
 372                                                 sk, sel_type, wait));
 373 
 374   sk->inuse = 1;
 375   switch(sel_type) {
 376         case SEL_IN:
 377                 if(sk->debug)
 378                         printk("select in");
 379                 select_wait(sk->sleep, wait);
 380                 if(sk->debug)
 381                         printk("-select out");
 382                 if (skb_peek(&sk->rqueue) != NULL) {
 383                         if (sk->state == TCP_LISTEN || tcp_readable(sk)) {
 384                                 release_sock(sk);
 385                                 if(sk->debug)
 386                                         printk("-select ok data\n");
 387                                 return(1);
 388                         }
 389                 }
 390                 if (sk->err != 0)       /* Receiver error */
 391                 {
 392                         release_sock(sk);
 393                         if(sk->debug)
 394                                 printk("-select ok error");
 395                         return(1);
 396                 }
 397                 if (sk->shutdown & RCV_SHUTDOWN) {
 398                         release_sock(sk);
 399                         if(sk->debug)
 400                                 printk("-select ok down\n");
 401                         return(1);
 402                 } else {
 403                         release_sock(sk);
 404                         if(sk->debug)
 405                                 printk("-select fail\n");
 406                         return(0);
 407                 }
 408         case SEL_OUT:
 409                 select_wait(sk->sleep, wait);
 410                 if (sk->shutdown & SEND_SHUTDOWN) {
 411                         DPRINTF((DBG_TCP,
 412                                 "write select on shutdown socket.\n"));
 413 
 414                         /* FIXME: should this return an error? */
 415                         release_sock(sk);
 416                         return(0);
 417                 }
 418 
 419                 /*
 420                  * FIXME:
 421                  * Hack so it will probably be able to write
 422                  * something if it says it's ok to write.
 423                  */
 424                 if (sk->prot->wspace(sk) >= sk->mss) {
 425                         release_sock(sk);
 426                         /* This should cause connect to work ok. */
 427                         if (sk->state == TCP_SYN_RECV ||
 428                             sk->state == TCP_SYN_SENT) return(0);
 429                         return(1);
 430                 }
 431                 DPRINTF((DBG_TCP,
 432                         "tcp_select: sleeping on write sk->wmem_alloc = %d, "
 433                         "sk->packets_out = %d\n"
 434                         "sk->wback = %X, sk->wfront = %X\n"
 435                         "sk->send_seq = %u, sk->window_seq=%u\n", 
 436                                 sk->wmem_alloc, sk->packets_out,
 437                                 sk->wback, sk->wfront,
 438                                 sk->send_seq, sk->window_seq));
 439 
 440                 release_sock(sk);
 441                 return(0);
 442         case SEL_EX:
 443                 select_wait(sk->sleep,wait);
 444                 if (sk->err) {
 445                         release_sock(sk);
 446                         return(1);
 447                 }
 448                 release_sock(sk);
 449                 return(0);
 450   }
 451 
 452   release_sock(sk);
 453   return(0);
 454 }
 455 
 456 
 457 int
 458 tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 459 {
 460   int err;
 461   DPRINTF((DBG_TCP, "tcp_ioctl(sk=%X, cmd = %d, arg=%X)\n", sk, cmd, arg));
 462   switch(cmd) {
 463         case DDIOCSDBG:
 464                 return(dbg_ioctl((void *) arg, DBG_TCP));
 465 
 466         case TIOCINQ:
 467 #ifdef FIXME    /* FIXME: */
 468         case FIONREAD:
 469 #endif
 470                 {
 471                         unsigned long amount;
 472 
 473                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 474 
 475                         sk->inuse = 1;
 476                         amount = tcp_readable(sk);
 477                         release_sock(sk);
 478                         DPRINTF((DBG_TCP, "returning %d\n", amount));
 479                         err=verify_area(VERIFY_WRITE,(void *)arg,
 480                                                    sizeof(unsigned long));
 481                         if(err)
 482                                 return err;
 483                         put_fs_long(amount,(unsigned long *)arg);
 484                         return(0);
 485                 }
 486         case SIOCATMARK:
 487                 {
 488                         struct sk_buff *skb;
 489                         int answ = 0;
 490 
 491                         /*
 492                          * Try to figure out if we need to read
 493                          * some urgent data.
 494                          */
 495                         sk->inuse = 1;
 496                         if ((skb=skb_peek(&sk->rqueue)) != NULL) 
 497                         {
 498                                 if (sk->copied_seq+1 == skb->h.th->seq && skb->h.th->urg) 
 499                                                 answ = 1;
 500                         }
 501                         release_sock(sk);
 502                         err=verify_area(VERIFY_WRITE,(void *) arg,
 503                                                   sizeof(unsigned long));
 504                         if(err)
 505                                 return err;
 506                         put_fs_long(answ,(int *) arg);
 507                         return(0);
 508                 }
 509         case TIOCOUTQ:
 510                 {
 511                         unsigned long amount;
 512 
 513                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 514                         amount = sk->prot->wspace(sk);
 515                         err=verify_area(VERIFY_WRITE,(void *)arg,
 516                                                    sizeof(unsigned long));
 517                         if(err)
 518                                 return err;
 519                         put_fs_long(amount,(unsigned long *)arg);
 520                         return(0);
 521                 }
 522         default:
 523                 return(-EINVAL);
 524   }
 525 }
 526 
 527 
 528 /* This routine computes a TCP checksum. */
 529 unsigned short
 530 tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 531           unsigned long saddr, unsigned long daddr)
 532 {     
 533   unsigned long sum;
 534    
 535   if (saddr == 0) saddr = my_addr();
 536   print_th(th);
 537   __asm__("\t addl %%ecx,%%ebx\n"
 538           "\t adcl %%edx,%%ebx\n"
 539           "\t adcl $0, %%ebx\n"
 540           : "=b"(sum)
 541           : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 542           : "cx","bx","dx" );
 543    
 544   if (len > 3) {
 545         __asm__("\tclc\n"
 546                 "1:\n"
 547                 "\t lodsl\n"
 548                 "\t adcl %%eax, %%ebx\n"
 549                 "\t loop 1b\n"
 550                 "\t adcl $0, %%ebx\n"
 551                 : "=b"(sum) , "=S"(th)
 552                 : "0"(sum), "c"(len/4) ,"1"(th)
 553                 : "ax", "cx", "bx", "si" );
 554   }
 555    
 556   /* Convert from 32 bits to 16 bits. */
 557   __asm__("\t movl %%ebx, %%ecx\n"
 558           "\t shrl $16,%%ecx\n"
 559           "\t addw %%cx, %%bx\n"
 560           "\t adcw $0, %%bx\n"
 561           : "=b"(sum)
 562           : "0"(sum)
 563           : "bx", "cx");
 564    
 565   /* Check for an extra word. */
 566   if ((len & 2) != 0) {
 567         __asm__("\t lodsw\n"
 568                 "\t addw %%ax,%%bx\n"
 569                 "\t adcw $0, %%bx\n"
 570                 : "=b"(sum), "=S"(th)
 571                 : "0"(sum) ,"1"(th)
 572                 : "si", "ax", "bx");
 573   }
 574    
 575   /* Now check for the extra byte. */
 576   if ((len & 1) != 0) {
 577         __asm__("\t lodsb\n"
 578                 "\t movb $0,%%ah\n"
 579                 "\t addw %%ax,%%bx\n"
 580                 "\t adcw $0, %%bx\n"
 581                 : "=b"(sum)
 582                 : "0"(sum) ,"S"(th)
 583                 : "si", "ax", "bx");
 584   }
 585    
 586   /* We only want the bottom 16 bits, but we never cleared the top 16. */
 587   return((~sum) & 0xffff);
 588 }
 589 
 590 
 591 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 592                 unsigned long daddr, int len, struct sock *sk)
 593 {
 594         th->check = 0;
 595         th->check = tcp_check(th, len, saddr, daddr);
 596         return;
 597 }
 598 
 599 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 600 {
 601         int size;
 602 
 603         /* length of packet (not counting length of pre-tcp headers) */
 604         size = skb->len - ((unsigned char *) skb->h.th - skb->data);
 605 
 606         /* sanity check it.. */
 607         if (size < sizeof(struct tcphdr) || size > skb->len) {
 608                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 609                         skb, skb->data, skb->h.th, skb->len);
 610                 kfree_skb(skb, FREE_WRITE);
 611                 return;
 612         }
 613 
 614         /* If we have queued a header size packet.. */
 615         if (size == sizeof(struct tcphdr)) {
 616                 /* If its got a syn or fin its notionally included in the size..*/
 617                 if(!skb->h.th->syn && !skb->h.th->fin) {
 618                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 619                         kfree_skb(skb,FREE_WRITE);
 620                         return;
 621                 }
 622         }
 623   
 624         /* We need to complete and send the packet. */
 625         tcp_send_check(skb->h.th, sk->saddr, sk->daddr, size, sk);
 626 
 627         skb->h.seq = sk->send_seq;
 628         if (after(sk->send_seq , sk->window_seq) ||
 629             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 630              sk->packets_out >= sk->cong_window) {
 631                 DPRINTF((DBG_TCP, "sk->cong_window = %d, sk->packets_out = %d\n",
 632                                         sk->cong_window, sk->packets_out));
 633                 DPRINTF((DBG_TCP, "sk->send_seq = %d, sk->window_seq = %d\n",
 634                                         sk->send_seq, sk->window_seq));
 635                 skb->next = NULL;
 636                 skb->magic = TCP_WRITE_QUEUE_MAGIC;
 637                 if (sk->wback == NULL) {
 638                         sk->wfront = skb;
 639                 } else {
 640                         sk->wback->next = skb;
 641                 }
 642                 sk->wback = skb;
 643                 if (before(sk->window_seq, sk->wfront->h.seq) &&
 644                     sk->send_head == NULL &&
 645                     sk->ack_backlog == 0)
 646                   reset_timer(sk, TIME_PROBE0, sk->rto);
 647         } else {
 648                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 649         }
 650 }
 651 
 652 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 653 {
 654         struct sk_buff * skb;
 655         unsigned long flags;
 656 
 657         save_flags(flags);
 658         cli();
 659         skb = sk->partial;
 660         if (skb) {
 661                 sk->partial = NULL;
 662                 del_timer(&sk->partial_timer);
 663         }
 664         restore_flags(flags);
 665         return skb;
 666 }
 667 
 668 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 669 {
 670         struct sk_buff *skb;
 671 
 672         if (sk == NULL)
 673                 return;
 674         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 675                 tcp_send_skb(sk, skb);
 676 }
 677 
 678 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 679 {
 680         struct sk_buff * tmp;
 681         unsigned long flags;
 682 
 683         save_flags(flags);
 684         cli();
 685         tmp = sk->partial;
 686         if (tmp)
 687                 del_timer(&sk->partial_timer);
 688         sk->partial = skb;
 689         sk->partial_timer.expires = HZ;
 690         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 691         sk->partial_timer.data = (unsigned long) sk;
 692         add_timer(&sk->partial_timer);
 693         restore_flags(flags);
 694         if (tmp)
 695                 tcp_send_skb(sk, tmp);
 696 }
 697 
 698 
 699 /* This routine sends an ack and also updates the window. */
 700 static void
 701 tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 702              struct sock *sk,
 703              struct tcphdr *th, unsigned long daddr)
 704 {
 705   struct sk_buff *buff;
 706   struct tcphdr *t1;
 707   struct device *dev = NULL;
 708   int tmp;
 709 
 710   if(sk->zapped)
 711         return;         /* We have been reset, we may not send again */
 712   /*
 713    * We need to grab some memory, and put together an ack,
 714    * and then put it into the queue to be sent.
 715    */
 716   buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 717   if (buff == NULL) {
 718         /* Force it to send an ack. */
 719         sk->ack_backlog++;
 720         if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) {
 721                 reset_timer(sk, TIME_WRITE, 10);
 722         }
 723 if (inet_debug == DBG_SLIP) printk("\rtcp_ack: malloc failed\n");
 724         return;
 725   }
 726 
 727   buff->mem_addr = buff;
 728   buff->mem_len = MAX_ACK_SIZE;
 729   buff->len = sizeof(struct tcphdr);
 730   buff->sk = sk;
 731   t1 =(struct tcphdr *) buff->data;
 732 
 733   /* Put in the IP header and routing stuff. */
 734   tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 735                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 736   if (tmp < 0) {
 737         buff->free=1;
 738         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 739 if (inet_debug == DBG_SLIP) printk("\rtcp_ack: build_header failed\n");
 740         return;
 741   }
 742   buff->len += tmp;
 743   t1 =(struct tcphdr *)((char *)t1 +tmp);
 744 
 745   /* FIXME: */
 746   memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 747 
 748   /* swap the send and the receive. */
 749   t1->dest = th->source;
 750   t1->source = th->dest;
 751   t1->seq = ntohl(sequence);
 752   t1->ack = 1;
 753   sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
 754   t1->window = ntohs(sk->window);
 755   t1->res1 = 0;
 756   t1->res2 = 0;
 757   t1->rst = 0;
 758   t1->urg = 0;
 759   t1->syn = 0;
 760   t1->psh = 0;
 761   t1->fin = 0;
 762   if (ack == sk->acked_seq) {
 763         sk->ack_backlog = 0;
 764         sk->bytes_rcv = 0;
 765         sk->ack_timed = 0;
 766         if (sk->send_head == NULL && sk->wfront == NULL && sk->timeout == TIME_WRITE) 
 767         {
 768                 if(sk->keepopen)
 769                         reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 770                 else
 771                         delete_timer(sk);
 772         }
 773   }
 774   t1->ack_seq = ntohl(ack);
 775   t1->doff = sizeof(*t1)/4;
 776   tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 777   if (sk->debug)
 778          printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 779   sk->prot->queue_xmit(sk, dev, buff, 1);
 780 }
 781 
 782 
 783 /* This routine builds a generic TCP header. */
 784 static int
 785 tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 786 {
 787 
 788   /* FIXME: want to get rid of this. */
 789   memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 790   th->seq = htonl(sk->send_seq);
 791   th->psh =(push == 0) ? 1 : 0;
 792   th->doff = sizeof(*th)/4;
 793   th->ack = 1;
 794   th->fin = 0;
 795   sk->ack_backlog = 0;
 796   sk->bytes_rcv = 0;
 797   sk->ack_timed = 0;
 798   th->ack_seq = htonl(sk->acked_seq);
 799   sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/;
 800   th->window = htons(sk->window);
 801 
 802   return(sizeof(*th));
 803 }
 804 
 805 /*
 806  * This routine copies from a user buffer into a socket,
 807  * and starts the transmit system.
 808  */
 809 static int
 810 tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 811           int len, int nonblock, unsigned flags)
 812 {
 813   int copied = 0;
 814   int copy;
 815   int tmp;
 816   struct sk_buff *skb;
 817   struct sk_buff *send_tmp;
 818   unsigned char *buff;
 819   struct proto *prot;
 820   struct device *dev = NULL;
 821 
 822   DPRINTF((DBG_TCP, "tcp_write(sk=%X, from=%X, len=%d, nonblock=%d, flags=%X)\n",
 823                                         sk, from, len, nonblock, flags));
 824 
 825   sk->inuse=1;
 826   prot = sk->prot;
 827   while(len > 0) {
 828         if (sk->err) {                  /* Stop on an error */
 829                 release_sock(sk);
 830                 if (copied) return(copied);
 831                 tmp = -sk->err;
 832                 sk->err = 0;
 833                 return(tmp);
 834         }
 835 
 836         /* First thing we do is make sure that we are established. */    
 837         if (sk->shutdown & SEND_SHUTDOWN) {
 838                 release_sock(sk);
 839                 sk->err = EPIPE;
 840                 if (copied) return(copied);
 841                 sk->err = 0;
 842                 return(-EPIPE);
 843         }
 844 
 845 
 846         /* Wait for a connection to finish. */
 847         
 848         while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) {
 849                 if (sk->err) {
 850                         release_sock(sk);
 851                         if (copied) return(copied);
 852                         tmp = -sk->err;
 853                         sk->err = 0;
 854                         return(tmp);
 855                 }
 856 
 857                 if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) {
 858                         release_sock(sk);
 859                         DPRINTF((DBG_TCP, "tcp_write: return 1\n"));
 860                         if (copied) return(copied);
 861 
 862                         if (sk->err) {
 863                                 tmp = -sk->err;
 864                                 sk->err = 0;
 865                                 return(tmp);
 866                         }
 867 
 868                         if (sk->keepopen) {
 869                                 send_sig(SIGPIPE, current, 0);
 870                         }
 871                         return(-EPIPE);
 872                 }
 873 
 874                 if (nonblock || copied) {
 875                         release_sock(sk);
 876                         DPRINTF((DBG_TCP, "tcp_write: return 2\n"));
 877                         if (copied) return(copied);
 878                         return(-EAGAIN);
 879                 }
 880 
 881                 release_sock(sk);
 882                 cli();
 883                 if (sk->state != TCP_ESTABLISHED &&
 884                     sk->state != TCP_CLOSE_WAIT && sk->err == 0) {
 885                         interruptible_sleep_on(sk->sleep);
 886                         if (current->signal & ~current->blocked) {
 887                                 sti();
 888                                 DPRINTF((DBG_TCP, "tcp_write: return 3\n"));
 889                                 if (copied) return(copied);
 890                                 return(-ERESTARTSYS);
 891                         }
 892                 }
 893                 sk->inuse = 1;
 894                 sti();
 895         }
 896 
 897 /*
 898  * The following code can result in copy <= if sk->mss is ever
 899  * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
 900  * sk->mtu is constant once SYN processing is finished.  I.e. we
 901  * had better not get here until we've seen his SYN and at least one
 902  * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
 903  * But ESTABLISHED should guarantee that.  sk->max_window is by definition
 904  * non-decreasing.  Note that any ioctl to set user_mss must be done
 905  * before the exchange of SYN's.  If the initial ack from the other
 906  * end has a window of 0, max_window and thus mss will both be 0.
 907  */
 908 
 909         /* Now we need to check if we have a half built packet. */
 910         if ((skb = tcp_dequeue_partial(sk)) != NULL) {
 911                 int hdrlen;
 912 
 913                  /* IP header + TCP header */
 914                 hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
 915                          + sizeof(struct tcphdr);
 916 
 917                 /* Add more stuff to the end of skb->len */
 918                 if (!(flags & MSG_OOB)) {
 919                         copy = min(sk->mss - (skb->len - hdrlen), len);
 920                         /* FIXME: this is really a bug. */
 921                         if (copy <= 0) {
 922                           printk("TCP: **bug**: \"copy\" <= 0!!\n");
 923                           copy = 0;
 924                         }
 925           
 926                         memcpy_fromfs(skb->data + skb->len, from, copy);
 927                         skb->len += copy;
 928                         from += copy;
 929                         copied += copy;
 930                         len -= copy;
 931                         sk->send_seq += copy;
 932                       }
 933                 if ((skb->len - hdrlen) >= sk->mss ||
 934                     (flags & MSG_OOB) ||
 935                     !sk->packets_out)
 936                         tcp_send_skb(sk, skb);
 937                 else
 938                         tcp_enqueue_partial(skb, sk);
 939                 continue;
 940         }
 941 
 942         /*
 943          * We also need to worry about the window.
 944          * If window < 1/2 the maximum window we've seen from this
 945          *   host, don't use it.  This is sender side
 946          *   silly window prevention, as specified in RFC1122.
 947          *   (Note that this is diffferent than earlier versions of
 948          *   SWS prevention, e.g. RFC813.).  What we actually do is 
 949          *   use the whole MSS.  Since the results in the right
 950          *   edge of the packet being outside the window, it will
 951          *   be queued for later rather than sent.
 952          */
 953 
 954         copy = diff(sk->window_seq, sk->send_seq);
 955         /* what if max_window == 1?  In that case max_window >> 1 is 0.
 956          * however in that case copy == max_window, so it's OK to use 
 957          * the window */
 958         if (copy < (sk->max_window >> 1))
 959           copy = sk->mss;
 960         copy = min(copy, sk->mss);
 961         copy = min(copy, len);
 962 
 963   /* We should really check the window here also. */
 964         send_tmp = NULL;
 965         if (copy < sk->mss && !(flags & MSG_OOB)) {
 966         /* We will release the socket incase we sleep here. */
 967           release_sock(sk);
 968           /* NB: following must be mtu, because mss can be increased.
 969            * mss is always <= mtu */
 970           skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header + sizeof(*skb), 0, GFP_KERNEL);
 971           sk->inuse = 1;
 972           send_tmp = skb;
 973         } else {
 974                 /* We will release the socket incase we sleep here. */
 975           release_sock(sk);
 976           skb = prot->wmalloc(sk, copy + prot->max_header + sizeof(*skb), 0, GFP_KERNEL);
 977           sk->inuse = 1;
 978         }
 979 
 980         /* If we didn't get any memory, we need to sleep. */
 981         if (skb == NULL) {
 982                 if (nonblock /* || copied */) {
 983                         release_sock(sk);
 984                         DPRINTF((DBG_TCP, "tcp_write: return 4\n"));
 985                         if (copied) return(copied);
 986                         return(-EAGAIN);
 987                 }
 988 
 989                 /* FIXME: here is another race condition. */
 990                 tmp = sk->wmem_alloc;
 991                 release_sock(sk);
 992                 cli();
 993                 /* Again we will try to avoid it. */
 994                 if (tmp <= sk->wmem_alloc &&
 995                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
 996                                 && sk->err == 0) {
 997                         interruptible_sleep_on(sk->sleep);
 998                         if (current->signal & ~current->blocked) {
 999                                 sti();
1000                                 DPRINTF((DBG_TCP, "tcp_write: return 5\n"));
1001                                 if (copied) return(copied);
1002                                 return(-ERESTARTSYS);
1003                         }
1004                 }
1005                 sk->inuse = 1;
1006                 sti();
1007                 continue;
1008         }
1009 
1010         skb->len = 0;
1011         skb->sk = sk;
1012         skb->free = 0;
1013 
1014         buff = skb->data;
1015 
1016         /*
1017          * FIXME: we need to optimize this.
1018          * Perhaps some hints here would be good.
1019          */
1020         tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1021                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1022         if (tmp < 0 ) {
1023                 prot->wfree(sk, skb->mem_addr, skb->mem_len);
1024                 release_sock(sk);
1025                 DPRINTF((DBG_TCP, "tcp_write: return 6\n"));
1026                 if (copied) return(copied);
1027                 return(tmp);
1028         }
1029         skb->len += tmp;
1030         skb->dev = dev;
1031         buff += tmp;
1032         skb->h.th =(struct tcphdr *) buff;
1033         tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1034         if (tmp < 0) {
1035                 prot->wfree(sk, skb->mem_addr, skb->mem_len);
1036                 release_sock(sk);
1037                 DPRINTF((DBG_TCP, "tcp_write: return 7\n"));
1038                 if (copied) return(copied);
1039                 return(tmp);
1040         }
1041 
1042         if (flags & MSG_OOB) {
1043                 ((struct tcphdr *)buff)->urg = 1;
1044                 ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1045         }
1046         skb->len += tmp;
1047         memcpy_fromfs(buff+tmp, from, copy);
1048 
1049         from += copy;
1050         copied += copy;
1051         len -= copy;
1052         skb->len += copy;
1053         skb->free = 0;
1054         sk->send_seq += copy;
1055 
1056         if (send_tmp != NULL && sk->packets_out) {
1057                 tcp_enqueue_partial(send_tmp, sk);
1058                 continue;
1059         }
1060         tcp_send_skb(sk, skb);
1061   }
1062   sk->err = 0;
1063 
1064 /*
1065  *      Nagles rule. Turn Nagle off with TCP_NODELAY for highly
1066  *      interactive fast network servers. It's meant to be on and
1067  *      it really improves the throughput though not the echo time
1068  *      on my slow slip link - Alan
1069  */
1070 
1071   /* Avoid possible race on send_tmp - c/o Johannes Stille */
1072   if(sk->partial && 
1073      ((!sk->packets_out) 
1074      /* If not nagling we can send on the before case too.. */
1075       || (sk->nonagle && before(sk->send_seq , sk->window_seq))
1076       ))
1077         tcp_send_partial(sk);
1078   /* -- */
1079   release_sock(sk);
1080   DPRINTF((DBG_TCP, "tcp_write: return 8\n"));
1081   return(copied);
1082 }
1083 
1084 
1085 static int
1086 tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1087            int len, int nonblock, unsigned flags,
1088            struct sockaddr_in *addr, int addr_len)
1089 {
1090   struct sockaddr_in sin;
1091 
1092   if (addr_len < sizeof(sin)) return(-EINVAL);
1093   memcpy_fromfs(&sin, addr, sizeof(sin));
1094   if (sin.sin_family && sin.sin_family != AF_INET) return(-EINVAL);
1095   if (sin.sin_port != sk->dummy_th.dest) return(-EINVAL);
1096   if (sin.sin_addr.s_addr != sk->daddr) return(-EINVAL);
1097   return(tcp_write(sk, from, len, nonblock, flags));
1098 }
1099 
1100 
1101 static void
1102 tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1103 {
1104   int tmp;
1105   struct device *dev = NULL;
1106   struct tcphdr *t1;
1107   struct sk_buff *buff;
1108 
1109   DPRINTF((DBG_TCP, "in tcp read wakeup\n"));
1110   if (!sk->ack_backlog) return;
1111 
1112   /*
1113    * FIXME: we need to put code here to prevent this routine from
1114    * being called.  Being called once in a while is ok, so only check
1115    * if this is the second time in a row.
1116    */
1117 
1118   /*
1119    * We need to grab some memory, and put together an ack,
1120    * and then put it into the queue to be sent.
1121    */
1122   buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1123   if (buff == NULL) {
1124         /* Try again real soon. */
1125         reset_timer(sk, TIME_WRITE, 10);
1126         return;
1127   }
1128 
1129   buff->mem_addr = buff;
1130   buff->mem_len = MAX_ACK_SIZE;
1131   buff->len = sizeof(struct tcphdr);
1132   buff->sk = sk;
1133 
1134   /* Put in the IP header and routing stuff. */
1135   tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1136                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1137   if (tmp < 0) {
1138         buff->free=1;
1139         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1140         return;
1141   }
1142 
1143   buff->len += tmp;
1144   t1 =(struct tcphdr *)(buff->data +tmp);
1145 
1146   memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1147   t1->seq = ntohl(sk->send_seq);
1148   t1->ack = 1;
1149   t1->res1 = 0;
1150   t1->res2 = 0;
1151   t1->rst = 0;
1152   t1->urg = 0;
1153   t1->syn = 0;
1154   t1->psh = 0;
1155   sk->ack_backlog = 0;
1156   sk->bytes_rcv = 0;
1157   sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
1158   t1->window = ntohs(sk->window);
1159   t1->ack_seq = ntohl(sk->acked_seq);
1160   t1->doff = sizeof(*t1)/4;
1161   tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1162   sk->prot->queue_xmit(sk, dev, buff, 1);
1163 }
1164 
1165 
1166 /*
1167  * FIXME:
1168  * This routine frees used buffers.
1169  * It should consider sending an ACK to let the
1170  * other end know we now have a bigger window.
1171  */
1172 static void
1173 cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1174 {
1175   unsigned long flags;
1176   int left;
1177   struct sk_buff *skb;
1178 
1179   if(sk->debug)
1180         printk("cleaning rbuf for sk=%p\n", sk);
1181   
1182   save_flags(flags);
1183   cli();
1184   
1185   left = sk->prot->rspace(sk);
1186  
1187   /*
1188    * We have to loop through all the buffer headers,
1189    * and try to free up all the space we can.
1190    */
1191   while((skb=skb_peek(&sk->rqueue)) != NULL ) 
1192   {
1193         if (!skb->used) 
1194                 break;
1195         skb_unlink(skb);
1196         skb->sk = sk;
1197         kfree_skb(skb, FREE_READ);
1198   }
1199 
1200   restore_flags(flags);
1201 
1202   /*
1203    * FIXME:
1204    * At this point we should send an ack if the difference
1205    * in the window, and the amount of space is bigger than
1206    * TCP_WINDOW_DIFF.
1207    */
1208   DPRINTF((DBG_TCP, "sk->window left = %d, sk->prot->rspace(sk)=%d\n",
1209                         sk->window - sk->bytes_rcv, sk->prot->rspace(sk)));
1210 
1211   if(sk->debug)
1212         printk("sk->rspace = %lu, was %d\n", sk->prot->rspace(sk),
1213                                             left);
1214   if (sk->prot->rspace(sk) != left) 
1215   {
1216         /*
1217          * This area has caused the most trouble.  The current strategy
1218          * is to simply do nothing if the other end has room to send at
1219          * least 3 full packets, because the ack from those will auto-
1220          * matically update the window.  If the other end doesn't think
1221          * we have much space left, but we have room for atleast 1 more
1222          * complete packet than it thinks we do, we will send an ack
1223          * immediatedly.  Otherwise we will wait up to .5 seconds in case
1224          * the user reads some more.
1225          */
1226         sk->ack_backlog++;
1227 /*
1228  * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1229  * if the other end is offering a window smaller than the agreed on MSS
1230  * (called sk->mtu here).  In theory there's no connection between send
1231  * and receive, and so no reason to think that they're going to send
1232  * small packets.  For the moment I'm using the hack of reducing the mss
1233  * only on the send side, so I'm putting mtu here.
1234  */
1235         if ((sk->prot->rspace(sk) > (sk->window - sk->bytes_rcv + sk->mtu))) {
1236                 /* Send an ack right now. */
1237                 tcp_read_wakeup(sk);
1238         } else {
1239                 /* Force it to send an ack soon. */
1240                 int was_active = del_timer(&sk->timer);
1241                 if (!was_active || TCP_ACK_TIME < sk->timer.expires) {
1242                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1243                 } else
1244                         add_timer(&sk->timer);
1245         }
1246   }
1247 } 
1248 
1249 
1250 /* Handle reading urgent data. */
1251 static int
1252 tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1253              unsigned char *to, int len, unsigned flags)
1254 {
1255   int copied = 0;
1256   struct sk_buff *skb;
1257 
1258   DPRINTF((DBG_TCP, "tcp_read_urg(sk=%X, to=%X, len=%d, flags=%X)\n",
1259                                         sk, to, len, flags));
1260 
1261   while(len > 0) 
1262   {
1263         sk->inuse = 1;
1264         while(sk->urg==0 || skb_peek(&sk->rqueue) == NULL) {
1265                 if (sk->err) {
1266                         int tmp;
1267 
1268                         release_sock(sk);
1269                         if (copied) return(copied);
1270                         tmp = -sk->err;
1271                         sk->err = 0;
1272                         return(tmp);
1273                 }
1274 
1275                 if (sk->state == TCP_CLOSE || sk->done) {
1276                         release_sock(sk);
1277                         if (copied) return(copied);
1278                         if (!sk->done) {
1279                                 sk->done = 1;
1280                                 return(0);
1281                         }
1282                         return(-ENOTCONN);
1283                 }
1284                  
1285                 if (sk->shutdown & RCV_SHUTDOWN) {
1286                         release_sock(sk);
1287                         if (copied == 0) 
1288                                 sk->done = 1;
1289                         return(copied);
1290                 }
1291 
1292                 if (nonblock || copied) {
1293                         release_sock(sk);
1294                         if (copied) return(copied);
1295                         return(-EAGAIN);
1296                 }
1297 
1298                 /* Now at this point, we may have gotten some data. */
1299                 release_sock(sk);
1300                 cli();
1301                 if ((sk->urg == 0 || skb_peek(&sk->rqueue) == NULL) &&
1302                     sk->err == 0 && !(sk->shutdown & RCV_SHUTDOWN)) {
1303                         interruptible_sleep_on(sk->sleep);
1304                         if (current->signal & ~current->blocked) {
1305                                 sti();
1306                                 if (copied) return(copied);
1307                                 return(-ERESTARTSYS);
1308                         }
1309                 }
1310                 sk->inuse = 1;
1311                 sti();
1312         }
1313 
1314         skb = skb_peek(&sk->rqueue);
1315         do {
1316                 int amt;
1317 
1318                 if (before(sk->copied_seq+1, skb->h.th->seq))
1319                         break;
1320                 if (skb->h.th->urg && !skb->urg_used) {
1321                         if (skb->h.th->urg_ptr == 0) {
1322                                 skb->h.th->urg_ptr = ntohs(skb->len);
1323                         }
1324                         amt = min(ntohs(skb->h.th->urg_ptr),len);
1325                         if(amt)
1326                         {
1327                                 memcpy_tofs(to,(unsigned char *)(skb->h.th) +
1328                                                         skb->h.th->doff*4, amt);
1329                         }
1330 
1331                         if (!(flags & MSG_PEEK)) {
1332                                 skb->urg_used = 1;
1333                                 sk->urg--;
1334                         }
1335                         cleanup_rbuf(sk);
1336                         release_sock(sk);
1337                         copied += amt;
1338                         return(copied);
1339                 }
1340                 skb =(struct sk_buff *)skb->next;
1341         } while(skb != sk->rqueue);
1342   }
1343 /*sk->urg = 0;*/
1344   cleanup_rbuf(sk);
1345   release_sock(sk);
1346   return(0);
1347 }
1348 
1349 
1350 /* This routine copies from a sock struct into the user buffer. */
1351 static int
1352 tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1353          int len, int nonblock, unsigned flags)
1354 {
1355   int copied=0; /* will be used to say how much has been copied. */
1356   struct sk_buff *skb;
1357   unsigned long offset;
1358   unsigned long used;
1359   int err;
1360 
1361   if (len == 0) return(0);
1362   if (len < 0) {
1363         return(-EINVAL);
1364   }
1365     
1366   err=verify_area(VERIFY_WRITE,to,len);
1367   if(err)
1368         return err;
1369         
1370   /* This error should be checked. */
1371   if (sk->state == TCP_LISTEN) return(-ENOTCONN);
1372 
1373   /* Urgent data needs to be handled specially. */
1374   if ((flags & MSG_OOB)) 
1375         return(tcp_read_urg(sk, nonblock, to, len, flags));
1376 
1377   /* So no-one else will use this socket. */
1378   sk->inuse = 1;
1379   
1380   skb=skb_peek(&sk->rqueue);
1381 
1382   DPRINTF((DBG_TCP, "tcp_read(sk=%X, to=%X, len=%d, nonblock=%d, flags=%X)\n",
1383                                                 sk, to, len, nonblock, flags));
1384 
1385   while(len > 0) {
1386         /* skb->used just checks to see if we've gone all the way around. */
1387         
1388         /* While no data, or first data indicates some is missing, or data is used */
1389         while(skb == NULL ||
1390               before(sk->copied_seq+1, skb->h.th->seq) || skb->used) {
1391                 DPRINTF((DBG_TCP, "skb = %X:\n", skb));
1392                 cleanup_rbuf(sk);
1393                 if (sk->err) 
1394                 {
1395                         int tmp;
1396 
1397                         release_sock(sk);
1398                         if (copied) 
1399                         {
1400                                 DPRINTF((DBG_TCP, "tcp_read: returning %d\n",
1401                                                                         copied));
1402                                 return(copied);
1403                         }
1404                         tmp = -sk->err;
1405                         sk->err = 0;
1406                         return(tmp);
1407                 }
1408 
1409                 if (sk->state == TCP_CLOSE) 
1410                 {
1411                         release_sock(sk);
1412                         if (copied) {
1413                                 DPRINTF((DBG_TCP, "tcp_read: returning %d\n",
1414                                                                 copied));
1415                                 return(copied);
1416                         }
1417                         if (!sk->done) {
1418                                 sk->done = 1;
1419                                 return(0);
1420                         }
1421                         return(-ENOTCONN);
1422                 }
1423 
1424                 if (sk->shutdown & RCV_SHUTDOWN) 
1425                 {
1426                         release_sock(sk);
1427                         if (copied == 0) sk->done = 1;
1428                         DPRINTF((DBG_TCP, "tcp_read: returning %d\n", copied));
1429                         return(copied);
1430                 }
1431                         
1432                 if (nonblock || copied) 
1433                 {
1434                         release_sock(sk);
1435                         if(sk->debug)
1436                                 printk("read: EAGAIN\n");
1437                         if (copied) 
1438                         {
1439                                 DPRINTF((DBG_TCP, "tcp_read: returning %d\n",
1440                                                                 copied));
1441                                 return(copied);
1442                         }
1443                         return(-EAGAIN);
1444                 }
1445 
1446                 if ((flags & MSG_PEEK) && copied != 0) 
1447                 {
1448                         release_sock(sk);
1449                         DPRINTF((DBG_TCP, "tcp_read: returning %d\n", copied));
1450                         return(copied);
1451                 }
1452                  
1453                 DPRINTF((DBG_TCP, "tcp_read about to sleep. state = %d\n",
1454                                                                 sk->state));
1455                 release_sock(sk);
1456 
1457                 /*
1458                  * Now we may have some data waiting or we could
1459                  * have changed state.
1460                  */
1461                 cli();
1462                 if (sk->shutdown & RCV_SHUTDOWN || sk->err != 0) {
1463                         sk->inuse = 1;
1464                         sti();
1465                         continue;
1466                 }
1467 
1468                 if (skb_peek(&sk->rqueue) == NULL ||
1469                     before(sk->copied_seq+1, sk->rqueue->h.th->seq)) {
1470                         if(sk->debug)
1471                                 printk("Read wait sleep\n");
1472                         interruptible_sleep_on(sk->sleep);
1473                         if(sk->debug)
1474                                 printk("Read wait wakes\n");
1475                         if (current->signal & ~current->blocked) {
1476                                 sti();
1477                                 if (copied) {
1478                                         DPRINTF((DBG_TCP, "tcp_read: returning %d\n",
1479                                                                 copied));
1480                                         return(copied);
1481                                 }
1482                                 return(-ERESTARTSYS);
1483                         }
1484                 }
1485                 sk->inuse = 1;
1486                 sti();
1487                 DPRINTF((DBG_TCP, "tcp_read woke up. \n"));
1488 
1489 
1490                 skb=skb_peek(&sk->rqueue);
1491                 /* That may have been null if we were beaten, if so we loop again */
1492         }
1493 
1494         /*
1495          * Copy anything from the current block that needs
1496          * to go into the user buffer.
1497          */
1498          offset = sk->copied_seq+1 - skb->h.th->seq;
1499   
1500          if (skb->h.th->syn) offset--;
1501          if (offset < skb->len) /* Some of the packet is useful */
1502          {
1503                 /*
1504                  * If there is urgent data we must either
1505                  * return or skip over it.
1506                  */
1507                 if (skb->h.th->urg) 
1508                 {
1509                         if (skb->urg_used) 
1510                         {
1511                                 sk->copied_seq += ntohs(skb->h.th->urg_ptr);
1512                                 offset += ntohs(skb->h.th->urg_ptr);
1513                                 if (offset >= skb->len) 
1514                                 {
1515                                         skb->used = 1;
1516                                         skb =(struct sk_buff *)skb->next;
1517                                         continue;
1518                                 }
1519                         } 
1520                         else 
1521                         {
1522                                 release_sock(sk);
1523                                 if (copied) 
1524                                         return(copied);
1525                                 send_sig(SIGURG, current, 0);
1526                                 return(-EINTR);
1527                         }
1528                 }
1529                 /* Ok so how much can we use ? */
1530                 used = min(skb->len - offset, len);
1531                 /* Copy it */
1532                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1533                             skb->h.th->doff*4 + offset, used);
1534                 copied += used;
1535                 len -= used;
1536                 to += used;
1537                 
1538                 /* If we were reading the data is 'eaten' */
1539                 if (!(flags & MSG_PEEK)) 
1540                         sk->copied_seq += used;
1541               
1542                 /*
1543                  * Mark this data used if we are really reading it,
1544                  * and if it doesn't contain any urgent data. And we
1545                  * have used all the data.
1546                  */
1547                 if (!(flags & MSG_PEEK) &&
1548                    (!skb->h.th->urg || skb->urg_used) &&
1549                    (used + offset >= skb->len)) 
1550                         skb->used = 1;
1551               
1552                 /*
1553                  * See if this is the end of a message or if the
1554                  * remaining data is urgent.
1555                  */
1556                 if (/*skb->h.th->psh || */skb->h.th->urg) 
1557                 {
1558                         break;
1559                 }
1560         } 
1561         else 
1562         {       /* already used this data, must be a retransmit */
1563                 skb->used = 1;
1564         }
1565         /* Move along a packet */
1566         skb =(struct sk_buff *)skb->next;
1567   }
1568   /* Clean up data we have read: This will do ACK frames */
1569   cleanup_rbuf(sk);
1570   release_sock(sk);
1571   DPRINTF((DBG_TCP, "tcp_read: returning %d\n", copied));
1572   if (copied == 0 && nonblock) 
1573         return(-EAGAIN);
1574   return(copied);
1575 }
1576 
1577   
1578 /*
1579  * Send a FIN without closing the connection.
1580  * Not called at interrupt time.
1581  */
1582 void
1583 tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1584 {
1585   struct sk_buff *buff;
1586   struct tcphdr *t1, *th;
1587   struct proto *prot;
1588   int tmp;
1589   struct device *dev = NULL;
1590 
1591   /*
1592    * We need to grab some memory, and put together a FIN,
1593    * and then put it into the queue to be sent.
1594    * FIXME:
1595    *    Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1596    *    Most of this is guesswork, so maybe it will work...
1597    */
1598   /* If we've already sent a FIN, return. */
1599   if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2) return;
1600   if (!(how & SEND_SHUTDOWN)) return;
1601   sk->inuse = 1;
1602 
1603   /* Clear out any half completed packets. */
1604   if (sk->partial)
1605         tcp_send_partial(sk);
1606 
1607   prot =(struct proto *)sk->prot;
1608   th =(struct tcphdr *)&sk->dummy_th;
1609   release_sock(sk); /* incase the malloc sleeps. */
1610   buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1611   if (buff == NULL) return;
1612   sk->inuse = 1;
1613 
1614   DPRINTF((DBG_TCP, "tcp_shutdown_send buff = %X\n", buff));
1615   buff->mem_addr = buff;
1616   buff->mem_len = MAX_RESET_SIZE;
1617   buff->sk = sk;
1618   buff->len = sizeof(*t1);
1619   t1 =(struct tcphdr *) buff->data;
1620 
1621   /* Put in the IP header and routing stuff. */
1622   tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1623                            IPPROTO_TCP, sk->opt,
1624                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1625   if (tmp < 0) {
1626         buff->free=1;
1627         prot->wfree(sk,buff->mem_addr, buff->mem_len);
1628         release_sock(sk);
1629         DPRINTF((DBG_TCP, "Unable to build header for fin.\n"));
1630         return;
1631   }
1632 
1633   t1 =(struct tcphdr *)((char *)t1 +tmp);
1634   buff->len += tmp;
1635   buff->dev = dev;
1636   memcpy(t1, th, sizeof(*t1));
1637   t1->seq = ntohl(sk->send_seq);
1638   sk->send_seq++;
1639   buff->h.seq = sk->send_seq;
1640   t1->ack = 1;
1641   t1->ack_seq = ntohl(sk->acked_seq);
1642   t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
1643   t1->fin = 1;
1644   t1->rst = 0;
1645   t1->doff = sizeof(*t1)/4;
1646   tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1647 
1648   /*
1649    * Can't just queue this up.
1650    * It should go at the end of the write queue.
1651    */
1652   if (sk->wback != NULL) {
1653         buff->free=0;   
1654         buff->next = NULL;
1655         sk->wback->next = buff;
1656         sk->wback = buff;
1657         buff->magic = TCP_WRITE_QUEUE_MAGIC;
1658   } else {
1659         sk->prot->queue_xmit(sk, dev, buff, 0);
1660   }
1661 
1662   if (sk->state == TCP_ESTABLISHED) sk->state = TCP_FIN_WAIT1;
1663     else sk->state = TCP_FIN_WAIT2;
1664 
1665   release_sock(sk);
1666 }
1667 
1668 
1669 static int
1670 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1671              int to_len, int nonblock, unsigned flags,
1672              struct sockaddr_in *addr, int *addr_len)
1673 {
1674   struct sockaddr_in sin;
1675   int len;
1676   int err;
1677   int result;
1678   
1679   /* Have to check these first unlike the old code. If 
1680      we check them after we lose data on an error
1681      which is wrong */
1682   err = verify_area(VERIFY_WRITE,addr_len,sizeof(long));
1683   if(err)
1684         return err;
1685   len = get_fs_long(addr_len);
1686   if(len > sizeof(sin))
1687         len = sizeof(sin);
1688   err=verify_area(VERIFY_WRITE, addr, len);  
1689   if(err)
1690         return err;
1691         
1692   result=tcp_read(sk, to, to_len, nonblock, flags);
1693 
1694   if (result < 0) return(result);
1695   
1696   sin.sin_family = AF_INET;
1697   sin.sin_port = sk->dummy_th.dest;
1698   sin.sin_addr.s_addr = sk->daddr;
1699 
1700   memcpy_tofs(addr, &sin, len);
1701   put_fs_long(len, addr_len);
1702   return(result);
1703 }
1704 
1705 
1706 /* This routine will send an RST to the other tcp. */
1707 static void
1708 tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1709           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1710 {
1711   struct sk_buff *buff;
1712   struct tcphdr *t1;
1713   int tmp;
1714 
1715   /*
1716    * We need to grab some memory, and put together an RST,
1717    * and then put it into the queue to be sent.
1718    */
1719   buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1720   if (buff == NULL) 
1721         return;
1722 
1723   DPRINTF((DBG_TCP, "tcp_reset buff = %X\n", buff));
1724   buff->mem_addr = buff;
1725   buff->mem_len = MAX_RESET_SIZE;
1726   buff->len = sizeof(*t1);
1727   buff->sk = NULL;
1728   buff->dev = dev;
1729 
1730   t1 =(struct tcphdr *) buff->data;
1731 
1732   /* Put in the IP header and routing stuff. */
1733   tmp = prot->build_header(buff, saddr, daddr, &dev, IPPROTO_TCP, opt,
1734                            sizeof(struct tcphdr),tos,ttl);
1735   if (tmp < 0) {
1736         buff->free = 1;
1737         prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1738         return;
1739   }
1740   t1 =(struct tcphdr *)((char *)t1 +tmp);
1741   buff->len += tmp;
1742   memcpy(t1, th, sizeof(*t1));
1743 
1744   /* Swap the send and the receive. */
1745   t1->dest = th->source;
1746   t1->source = th->dest;
1747   t1->rst = 1;  
1748   t1->window = 0;
1749   
1750   if(th->ack)
1751   {
1752         t1->ack=0;
1753         t1->seq=th->ack_seq;
1754         t1->ack_seq=0;
1755   }
1756   else
1757   {
1758         t1->ack=1;
1759         if(!th->syn)
1760                 t1->ack_seq=htonl(th->seq);
1761         else
1762                 t1->ack_seq=htonl(th->seq+1);
1763         t1->seq=0;
1764   }
1765 
1766   t1->syn = 0;
1767   t1->urg = 0;
1768   t1->fin = 0;
1769   t1->psh = 0;
1770   t1->doff = sizeof(*t1)/4;
1771   tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1772   prot->queue_xmit(NULL, dev, buff, 1);
1773 }
1774 
1775 
1776 /*
1777  *      Look for tcp options. Parses everything but only knows about MSS.
1778  *      This routine is always called with the packet containing the SYN.
1779  *      However it may also be called with the ack to the SYN.  So you
1780  *      can't assume this is always the SYN.  It's always called after
1781  *      we have set up sk->mtu to our own MTU.
1782  */
1783  
1784 static void
1785 tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1786 {
1787   unsigned char *ptr;
1788   int length=(th->doff*4)-sizeof(struct tcphdr);
1789   int mss_seen = 0;
1790     
1791   ptr = (unsigned char *)(th + 1);
1792   
1793   while(length>0)
1794   {
1795         int opcode=*ptr++;
1796         int opsize=*ptr++;
1797         switch(opcode)
1798         {
1799                 case TCPOPT_EOL:
1800                         return;
1801                 case TCPOPT_NOP:
1802                         length-=2;
1803                         continue;
1804                 
1805                 default:
1806                         if(opsize<=2)   /* Avoid silly options looping forever */
1807                                 return;
1808                         switch(opcode)
1809                         {
1810                                 case TCPOPT_MSS:
1811                                         if(opsize==4 && th->syn)
1812                                         {
1813                                                 sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1814                                                 mss_seen = 1;
1815                                         }
1816                                         break;
1817                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1818                         }
1819                         ptr+=opsize-2;
1820                         length-=opsize;
1821         }
1822   }
1823   if (th->syn) {
1824     if (! mss_seen)
1825       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1826   }
1827   sk->mss = min(sk->max_window, sk->mtu);
1828 }
1829 
1830 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1831 {
1832         dst = ntohl(dst);
1833         if (IN_CLASSA(dst))
1834                 return htonl(IN_CLASSA_NET);
1835         if (IN_CLASSB(dst))
1836                 return htonl(IN_CLASSB_NET);
1837         return htonl(IN_CLASSC_NET);
1838 }
1839 
1840 /*
1841  * This routine handles a connection request.
1842  * It should make sure we haven't already responded.
1843  * Because of the way BSD works, we have to send a syn/ack now.
1844  * This also means it will be harder to close a socket which is
1845  * listening.
1846  */
1847 static void
1848 tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1849                  unsigned long daddr, unsigned long saddr,
1850                  struct options *opt, struct device *dev)
1851 {
1852   struct sk_buff *buff;
1853   struct tcphdr *t1;
1854   unsigned char *ptr;
1855   struct sock *newsk;
1856   struct tcphdr *th;
1857   int tmp;
1858 
1859   DPRINTF((DBG_TCP, "tcp_conn_request(sk = %X, skb = %X, daddr = %X, sadd4= %X, \n"
1860           "                  opt = %X, dev = %X)\n",
1861           sk, skb, daddr, saddr, opt, dev));
1862   
1863   th = skb->h.th;
1864 
1865   /* If the socket is dead, don't accept the connection. */
1866   if (!sk->dead) {
1867         sk->data_ready(sk,0);
1868   } else {
1869         DPRINTF((DBG_TCP, "tcp_conn_request on dead socket\n"));
1870         tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1871         kfree_skb(skb, FREE_READ);
1872         return;
1873   }
1874 
1875   /*
1876    * Make sure we can accept more.  This will prevent a
1877    * flurry of syns from eating up all our memory.
1878    */
1879   if (sk->ack_backlog >= sk->max_ack_backlog) {
1880         kfree_skb(skb, FREE_READ);
1881         return;
1882   }
1883 
1884   /*
1885    * We need to build a new sock struct.
1886    * It is sort of bad to have a socket without an inode attached
1887    * to it, but the wake_up's will just wake up the listening socket,
1888    * and if the listening socket is destroyed before this is taken
1889    * off of the queue, this will take care of it.
1890    */
1891   newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
1892   if (newsk == NULL) {
1893         /* just ignore the syn.  It will get retransmitted. */
1894         kfree_skb(skb, FREE_READ);
1895         return;
1896   }
1897 
1898   DPRINTF((DBG_TCP, "newsk = %X\n", newsk));
1899   memcpy((void *)newsk,(void *)sk, sizeof(*newsk));
1900   newsk->wback = NULL;
1901   newsk->wfront = NULL;
1902   newsk->rqueue = NULL;
1903   newsk->send_head = NULL;
1904   newsk->send_tail = NULL;
1905   newsk->back_log = NULL;
1906   newsk->rtt = TCP_CONNECT_TIME << 3;
1907   newsk->rto = TCP_CONNECT_TIME;
1908   newsk->mdev = 0;
1909   newsk->max_window = 0;
1910   newsk->cong_window = 1;
1911   newsk->cong_count = 0;
1912   newsk->ssthresh = 0;
1913   newsk->backoff = 0;
1914   newsk->blog = 0;
1915   newsk->intr = 0;
1916   newsk->proc = 0;
1917   newsk->done = 0;
1918   newsk->partial = NULL;
1919   newsk->pair = NULL;
1920   newsk->wmem_alloc = 0;
1921   newsk->rmem_alloc = 0;
1922 
1923   newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
1924 
1925   newsk->err = 0;
1926   newsk->shutdown = 0;
1927   newsk->ack_backlog = 0;
1928   newsk->acked_seq = skb->h.th->seq+1;
1929   newsk->fin_seq = skb->h.th->seq;
1930   newsk->copied_seq = skb->h.th->seq;
1931   newsk->state = TCP_SYN_RECV;
1932   newsk->timeout = 0;
1933   newsk->send_seq = jiffies * SEQ_TICK - seq_offset;
1934   newsk->window_seq = newsk->send_seq;
1935   newsk->rcv_ack_seq = newsk->send_seq;
1936   newsk->urg =0;
1937   newsk->retransmits = 0;
1938   newsk->destroy = 0;
1939   newsk->timer.data = (unsigned long)newsk;
1940   newsk->timer.function = &net_timer;
1941   newsk->dummy_th.source = skb->h.th->dest;
1942   newsk->dummy_th.dest = skb->h.th->source;
1943 
1944   /* Swap these two, they are from our point of view. */
1945   newsk->daddr = saddr;
1946   newsk->saddr = daddr;
1947 
1948   put_sock(newsk->num,newsk);
1949   newsk->dummy_th.res1 = 0;
1950   newsk->dummy_th.doff = 6;
1951   newsk->dummy_th.fin = 0;
1952   newsk->dummy_th.syn = 0;
1953   newsk->dummy_th.rst = 0;
1954   newsk->dummy_th.psh = 0;
1955   newsk->dummy_th.ack = 0;
1956   newsk->dummy_th.urg = 0;
1957   newsk->dummy_th.res2 = 0;
1958   newsk->acked_seq = skb->h.th->seq + 1;
1959   newsk->copied_seq = skb->h.th->seq;
1960 
1961   /* Grab the ttl and tos values and use them */
1962   newsk->ip_ttl=sk->ip_ttl;
1963   newsk->ip_tos=skb->ip_hdr->tos;
1964 
1965 /* use 512 or whatever user asked for */
1966 /* note use of sk->user_mss, since user has no direct access to newsk */
1967   if (sk->user_mss)
1968     newsk->mtu = sk->user_mss;
1969   else {
1970 #ifdef SUBNETSARELOCAL
1971     if ((saddr ^ daddr) & default_mask(saddr))
1972 #else
1973     if ((saddr ^ daddr) & dev->pa_mask)
1974 #endif
1975       newsk->mtu = 576 - HEADER_SIZE;
1976     else
1977       newsk->mtu = MAX_WINDOW;
1978   }
1979 /* but not bigger than device MTU */
1980   newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
1981 
1982 /* this will min with what arrived in the packet */
1983   tcp_options(newsk,skb->h.th);
1984 
1985   buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
1986   if (buff == NULL) {
1987         sk->err = -ENOMEM;
1988         newsk->dead = 1;
1989         release_sock(newsk);
1990         kfree_skb(skb, FREE_READ);
1991         return;
1992   }
1993   
1994   buff->mem_addr = buff;
1995   buff->mem_len = MAX_SYN_SIZE;
1996   buff->len = sizeof(struct tcphdr)+4;
1997   buff->sk = newsk;
1998   
1999   t1 =(struct tcphdr *) buff->data;
2000 
2001   /* Put in the IP header and routing stuff. */
2002   tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &dev,
2003                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2004 
2005   /* Something went wrong. */
2006   if (tmp < 0) {
2007         sk->err = tmp;
2008         buff->free=1;
2009         kfree_skb(buff,FREE_WRITE);
2010         newsk->dead = 1;
2011         release_sock(newsk);
2012         skb->sk = sk;
2013         kfree_skb(skb, FREE_READ);
2014         return;
2015   }
2016 
2017   buff->len += tmp;
2018   t1 =(struct tcphdr *)((char *)t1 +tmp);
2019   
2020   memcpy(t1, skb->h.th, sizeof(*t1));
2021   buff->h.seq = newsk->send_seq;
2022 
2023   /* Swap the send and the receive. */
2024   t1->dest = skb->h.th->source;
2025   t1->source = newsk->dummy_th.source;
2026   t1->seq = ntohl(newsk->send_seq++);
2027   t1->ack = 1;
2028   newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/
2029   t1->window = ntohs(newsk->window);
2030   t1->res1 = 0;
2031   t1->res2 = 0;
2032   t1->rst = 0;
2033   t1->urg = 0;
2034   t1->psh = 0;
2035   t1->syn = 1;
2036   t1->ack_seq = ntohl(skb->h.th->seq+1);
2037   t1->doff = sizeof(*t1)/4+1;
2038 
2039   ptr =(unsigned char *)(t1+1);
2040   ptr[0] = 2;
2041   ptr[1] = 4;
2042   ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2043   ptr[3] =(newsk->mtu) & 0xff;
2044 
2045   tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2046   newsk->prot->queue_xmit(newsk, dev, buff, 0);
2047 
2048   reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_CONNECT_TIME);
2049   skb->sk = newsk;
2050 
2051   /* Charge the sock_buff to newsk. */
2052   sk->rmem_alloc -= skb->mem_len;
2053   newsk->rmem_alloc += skb->mem_len;
2054 
2055   skb_queue_tail(&sk->rqueue,skb);
2056   sk->ack_backlog++;
2057   release_sock(newsk);
2058 }
2059 
2060 
2061 static void
2062 tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2063 {
2064   struct sk_buff *buff;
2065   int need_reset = 0;
2066   struct tcphdr *t1, *th;
2067   struct proto *prot;
2068   struct device *dev=NULL;
2069   int tmp;
2070 
2071   /*
2072    * We need to grab some memory, and put together a FIN,
2073    * and then put it into the queue to be sent.
2074    */
2075   DPRINTF((DBG_TCP, "tcp_close((struct sock *)%X, %d)\n",sk, timeout));
2076   sk->inuse = 1;
2077   sk->keepopen = 1;
2078   sk->shutdown = SHUTDOWN_MASK;
2079 
2080   if (!sk->dead) 
2081         sk->state_change(sk);
2082 
2083   /* We need to flush the recv. buffs. */
2084   if (skb_peek(&sk->rqueue) != NULL) 
2085   {
2086         struct sk_buff *skb;
2087         if(sk->debug)
2088                 printk("Clean rcv queue\n");
2089         while((skb=skb_dequeue(&sk->rqueue))!=NULL)
2090         {
2091                 if(skb->len > 0 && after(skb->h.th->seq + skb->len + 1 , sk->copied_seq))
2092                                 need_reset = 1;
2093                 kfree_skb(skb, FREE_READ);
2094         }
2095         if(sk->debug)
2096                 printk("Cleaned.\n");
2097   }
2098   sk->rqueue = NULL;
2099 
2100   /* Get rid off any half-completed packets. */
2101   if (sk->partial) {
2102         tcp_send_partial(sk);
2103   }
2104 
2105   switch(sk->state) {
2106         case TCP_FIN_WAIT1:
2107         case TCP_FIN_WAIT2:
2108         case TCP_LAST_ACK:
2109                 /* start a timer. */
2110                 /* original code was 4 * sk->rtt.  In converting to the
2111                  * new rtt representation, we can't quite use that.
2112                  * it seems to make most sense to  use the backed off value
2113                  */
2114                 reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2115                 if (timeout) tcp_time_wait(sk);
2116                 release_sock(sk);
2117                 return; /* break causes a double release - messy */
2118         case TCP_TIME_WAIT:
2119                 if (timeout) {
2120                   sk->state = TCP_CLOSE;
2121                 }
2122                 release_sock(sk);
2123                 return;
2124         case TCP_LISTEN:
2125                 sk->state = TCP_CLOSE;
2126                 release_sock(sk);
2127                 return;
2128         case TCP_CLOSE:
2129                 release_sock(sk);
2130                 return;
2131         case TCP_CLOSE_WAIT:
2132         case TCP_ESTABLISHED:
2133         case TCP_SYN_SENT:
2134         case TCP_SYN_RECV:
2135                 prot =(struct proto *)sk->prot;
2136                 th =(struct tcphdr *)&sk->dummy_th;
2137                 buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2138                 if (buff == NULL) {
2139                         /* This will force it to try again later. */
2140                         /* Or it would have if someone released the socket
2141                            first. Anyway it might work now */
2142                         release_sock(sk);
2143                         if (sk->state != TCP_CLOSE_WAIT)
2144                                         sk->state = TCP_ESTABLISHED;
2145                         reset_timer(sk, TIME_CLOSE, 100);
2146                         return;
2147                 }
2148                 buff->mem_addr = buff;
2149                 buff->mem_len = MAX_FIN_SIZE;
2150                 buff->sk = sk;
2151                 buff->free = 1;
2152                 buff->len = sizeof(*t1);
2153                 t1 =(struct tcphdr *) buff->data;
2154 
2155                 /* Put in the IP header and routing stuff. */
2156                 tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2157                                          IPPROTO_TCP, sk->opt,
2158                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2159                 if (tmp < 0) {
2160                         kfree_skb(buff,FREE_WRITE);
2161                         DPRINTF((DBG_TCP, "Unable to build header for fin.\n"));
2162                         release_sock(sk);
2163                         return;
2164                 }
2165 
2166                 t1 =(struct tcphdr *)((char *)t1 +tmp);
2167                 buff->len += tmp;
2168                 buff->dev = dev;
2169                 memcpy(t1, th, sizeof(*t1));
2170                 t1->seq = ntohl(sk->send_seq);
2171                 sk->send_seq++;
2172                 buff->h.seq = sk->send_seq;
2173                 t1->ack = 1;
2174 
2175                 /* Ack everything immediately from now on. */
2176                 sk->delay_acks = 0;
2177                 t1->ack_seq = ntohl(sk->acked_seq);
2178                 t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
2179                 t1->fin = 1;
2180                 t1->rst = need_reset;
2181                 t1->doff = sizeof(*t1)/4;
2182                 tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2183 
2184                 if (sk->wfront == NULL) {
2185                         prot->queue_xmit(sk, dev, buff, 0);
2186                 } else {
2187                         reset_timer(sk, TIME_WRITE, sk->rto);
2188                         buff->next = NULL;
2189                         if (sk->wback == NULL) {
2190                                 sk->wfront = buff;
2191                         } else {
2192                                 sk->wback->next = buff;
2193                         }
2194                         sk->wback = buff;
2195                         buff->magic = TCP_WRITE_QUEUE_MAGIC;
2196                 }
2197 
2198                 if (sk->state == TCP_CLOSE_WAIT) {
2199                         sk->state = TCP_FIN_WAIT2;
2200                 } else {
2201                         sk->state = TCP_FIN_WAIT1;
2202         }
2203   }
2204   release_sock(sk);
2205 }
2206 
2207 
2208 /*
2209  * This routine takes stuff off of the write queue,
2210  * and puts it in the xmit queue.
2211  */
2212 static void
2213 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2214 {
2215   struct sk_buff *skb;
2216 
2217   DPRINTF((DBG_TCP, "tcp_write_xmit(sk=%X)\n", sk));
2218 
2219   /* The bytes will have to remain here. In time closedown will
2220      empty the write queue and all will be happy */
2221   if(sk->zapped)
2222         return;
2223 
2224   while(sk->wfront != NULL &&
2225         before(sk->wfront->h.seq, sk->window_seq +1) &&
2226         (sk->retransmits == 0 ||
2227          sk->timeout != TIME_WRITE ||
2228          before(sk->wfront->h.seq, sk->rcv_ack_seq +1))
2229         && sk->packets_out < sk->cong_window) {
2230                 skb = sk->wfront;
2231                 IS_SKB(skb);
2232                 sk->wfront = skb->next;
2233                 if (sk->wfront == NULL) sk->wback = NULL;
2234                 skb->next = NULL;
2235                 if (skb->magic != TCP_WRITE_QUEUE_MAGIC) {
2236                         printk("tcp.c skb with bad magic(%X) on write queue. Squashing "
2237                                 "queue\n", skb->magic);
2238                         sk->wfront = NULL;
2239                         sk->wback = NULL;
2240                         return;
2241                 }
2242                 skb->magic = 0;
2243                 DPRINTF((DBG_TCP, "Sending a packet.\n"));
2244 
2245                 /* See if we really need to send the packet. */
2246                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) {
2247                         sk->retransmits = 0;
2248                         kfree_skb(skb, FREE_WRITE);
2249                         if (!sk->dead) sk->write_space(sk);
2250                 } else {
2251                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2252                 }
2253         }
2254 }
2255 
2256 
2257 /*
2258  * This routine sorts the send list, and resets the
2259  * sk->send_head and sk->send_tail pointers.
2260  */
2261 void
2262 sort_send(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2263 {
2264   struct sk_buff *list = NULL;
2265   struct sk_buff *skb,*skb2,*skb3;
2266 
2267   for (skb = sk->send_head; skb != NULL; skb = skb2) {
2268         skb2 = (struct sk_buff *)skb->link3;
2269         if (list == NULL || before (skb2->h.seq, list->h.seq)) {
2270                 skb->link3 = list;
2271                 sk->send_tail = skb;
2272                 list = skb;
2273         } else {
2274                 for (skb3 = list; ; skb3 = (struct sk_buff *)skb3->link3) {
2275                         if (skb3->link3 == NULL ||
2276                             before(skb->h.seq, skb3->link3->h.seq)) {
2277                                 skb->link3 = skb3->link3;
2278                                 skb3->link3 = skb;
2279                                 if (skb->link3 == NULL) sk->send_tail = skb;
2280                                 break;
2281                         }
2282                 }
2283         }
2284   }
2285   sk->send_head = list;
2286 }
2287   
2288 
2289 /* This routine deals with incoming acks, but not outgoing ones. */
2290 static int
2291 tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2292 {
2293   unsigned long ack;
2294   int flag = 0;
2295   /* 
2296    * 1 - there was data in packet as well as ack or new data is sent or 
2297    *     in shutdown state
2298    * 2 - data from retransmit queue was acked and removed
2299    * 4 - window shrunk or data from retransmit queue was acked and removed
2300    */
2301 
2302   if(sk->zapped)
2303         return(1);      /* Dead, cant ack any more so why bother */
2304 
2305   ack = ntohl(th->ack_seq);
2306   DPRINTF((DBG_TCP, "tcp_ack ack=%d, window=%d, "
2307           "sk->rcv_ack_seq=%d, sk->window_seq = %d\n",
2308           ack, ntohs(th->window), sk->rcv_ack_seq, sk->window_seq));
2309 
2310   if (ntohs(th->window) > sk->max_window) {
2311         sk->max_window = ntohs(th->window);
2312         sk->mss = min(sk->max_window, sk->mtu);
2313   }
2314 
2315   if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2316         sk->retransmits = 0;
2317 
2318   if (after(ack, sk->send_seq+1) || before(ack, sk->rcv_ack_seq-1)) {
2319         if (after(ack, sk->send_seq) ||
2320            (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) {
2321                 return(0);
2322         }
2323         if (sk->keepopen) {
2324                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2325         }
2326         return(1);
2327   }
2328 
2329   if (len != th->doff*4) flag |= 1;
2330 
2331   /* See if our window has been shrunk. */
2332   if (after(sk->window_seq, ack+ntohs(th->window))) {
2333         /*
2334          * We may need to move packets from the send queue
2335          * to the write queue, if the window has been shrunk on us.
2336          * The RFC says you are not allowed to shrink your window
2337          * like this, but if the other end does, you must be able
2338          * to deal with it.
2339          */
2340         struct sk_buff *skb;
2341         struct sk_buff *skb2;
2342         struct sk_buff *wskb = NULL;
2343   
2344         skb2 = sk->send_head;
2345         sk->send_head = NULL;
2346         sk->send_tail = NULL;
2347 
2348         flag |= 4;
2349 
2350         sk->window_seq = ack + ntohs(th->window);
2351         cli();
2352         while (skb2 != NULL) {
2353                 skb = skb2;
2354                 skb2 = (struct sk_buff *)skb->link3;
2355                 skb->link3 = NULL;
2356                 if (after(skb->h.seq, sk->window_seq)) {
2357                         if (sk->packets_out > 0) sk->packets_out--;
2358                         /* We may need to remove this from the dev send list. */
2359                         if (skb->next != NULL) {
2360                                 skb_unlink(skb);                                
2361                         }
2362                         /* Now add it to the write_queue. */
2363                         skb->magic = TCP_WRITE_QUEUE_MAGIC;
2364                         if (wskb == NULL) {
2365                                 skb->next = sk->wfront;
2366                                 sk->wfront = skb;
2367                         } else {
2368                                 skb->next = wskb->next;
2369                                 wskb->next = skb;
2370                         }
2371                         if (sk->wback == wskb) sk->wback = skb;
2372                         wskb = skb;
2373                 } else {
2374                         if (sk->send_head == NULL) {
2375                                 sk->send_head = skb;
2376                                 sk->send_tail = skb;
2377                         } else {
2378                                 sk->send_tail->link3 = skb;
2379                                 sk->send_tail = skb;
2380                         }
2381                         skb->link3 = NULL;
2382                 }
2383         }
2384         sti();
2385   }
2386 
2387   if (sk->send_tail == NULL || sk->send_head == NULL) {
2388         sk->send_head = NULL;
2389         sk->send_tail = NULL;
2390         sk->packets_out= 0;
2391   }
2392 
2393   sk->window_seq = ack + ntohs(th->window);
2394 
2395   /* We don't want too many packets out there. */
2396   if (sk->timeout == TIME_WRITE && 
2397       sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) {
2398 /* 
2399  * This is Jacobson's slow start and congestion avoidance. 
2400  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2401  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2402  * counter and increment it once every cwnd times.  It's possible
2403  * that this should be done only if sk->retransmits == 0.  I'm
2404  * interpreting "new data is acked" as including data that has
2405  * been retransmitted but is just now being acked.
2406  */
2407         if (sk->cong_window < sk->ssthresh)  
2408           /* in "safe" area, increase */
2409           sk->cong_window++;
2410         else {
2411           /* in dangerous area, increase slowly.  In theory this is
2412              sk->cong_window += 1 / sk->cong_window
2413            */
2414           if (sk->cong_count >= sk->cong_window) {
2415             sk->cong_window++;
2416             sk->cong_count = 0;
2417           } else 
2418             sk->cong_count++;
2419         }
2420   }
2421 
2422   DPRINTF((DBG_TCP, "tcp_ack: Updating rcv ack sequence.\n"));
2423   sk->rcv_ack_seq = ack;
2424 
2425   /*
2426    * if this ack opens up a zero window, clear backoff.  It was
2427    * being used to time the probes, and is probably far higher than
2428    * it needs to be for normal retransmission
2429    */
2430   if (sk->timeout == TIME_PROBE0) {
2431         if (sk->wfront != NULL &&   /* should always be non-null */
2432             ! before (sk->window_seq, sk->wfront->h.seq)) {
2433           sk->retransmits = 0;
2434           sk->backoff = 0;
2435           /* recompute rto from rtt.  this eliminates any backoff */
2436           sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2437           if (sk->rto > 120*HZ)
2438             sk->rto = 120*HZ;
2439           if (sk->rto < 1*HZ)
2440             sk->rto = 1*HZ;
2441         }
2442   }
2443 
2444   /* See if we can take anything off of the retransmit queue. */
2445   while(sk->send_head != NULL) {
2446         /* Check for a bug. */
2447         if (sk->send_head->link3 &&
2448             after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) {
2449                 printk("INET: tcp.c: *** bug send_list out of order.\n");
2450                 sort_send(sk);
2451         }
2452 
2453         if (before(sk->send_head->h.seq, ack+1)) {
2454                 struct sk_buff *oskb;
2455 
2456                 if (sk->retransmits) {
2457 
2458                   /* we were retransmitting.  don't count this in RTT est */
2459                   flag |= 2;
2460 
2461                   /*
2462                    * even though we've gotten an ack, we're still
2463                    * retransmitting as long as we're sending from
2464                    * the retransmit queue.  Keeping retransmits non-zero
2465                    * prevents us from getting new data interspersed with
2466                    * retransmissions.
2467                    */
2468 
2469                   if (sk->send_head->link3)
2470                     sk->retransmits = 1;
2471                   else
2472                     sk->retransmits = 0;
2473 
2474                 }
2475 
2476                 /*
2477                  * Note that we only reset backoff and rto in the
2478                  * rtt recomputation code.  And that doesn't happen
2479                  * if there were retransmissions in effect.  So the
2480                  * first new packet after the retransmissions is
2481                  * sent with the backoff still in effect.  Not until
2482                  * we get an ack from a non-retransmitted packet do
2483                  * we reset the backoff and rto.  This allows us to deal
2484                  * with a situation where the network delay has increased
2485                  * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2486                  */
2487 
2488                 /* We have one less packet out there. */
2489                 if (sk->packets_out > 0) sk->packets_out --;
2490                 DPRINTF((DBG_TCP, "skb=%X skb->h.seq = %d acked ack=%d\n",
2491                                 sk->send_head, sk->send_head->h.seq, ack));
2492 
2493                 /* Wake up the process, it can probably write more. */
2494                 if (!sk->dead) sk->write_space(sk);
2495 
2496                 oskb = sk->send_head;
2497 
2498                 if (!(flag&2)) {
2499                   long m;
2500 
2501                   /* The following amusing code comes from Jacobson's
2502                    * article in SIGCOMM '88.  Note that rtt and mdev
2503                    * are scaled versions of rtt and mean deviation.
2504                    * This is designed to be as fast as possible 
2505                    * m stands for "measurement".
2506                    */
2507 
2508                   m = jiffies - oskb->when;  /* RTT */
2509                   m -= (sk->rtt >> 3);       /* m is now error in rtt est */
2510                   sk->rtt += m;              /* rtt = 7/8 rtt + 1/8 new */
2511                   if (m < 0)
2512                     m = -m;                  /* m is now abs(error) */
2513                   m -= (sk->mdev >> 2);      /* similar update on mdev */
2514                   sk->mdev += m;             /* mdev = 3/4 mdev + 1/4 new */
2515 
2516                   /* now update timeout.  Note that this removes any backoff */
2517                   sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2518                   if (sk->rto > 120*HZ)
2519                     sk->rto = 120*HZ;
2520                   if (sk->rto < 1*HZ)
2521                     sk->rto = 1*HZ;
2522                   sk->backoff = 0;
2523 
2524                 }
2525                 flag |= (2|4);
2526 
2527                 cli();
2528 
2529                 oskb = sk->send_head;
2530                 IS_SKB(oskb);
2531                 sk->send_head =(struct sk_buff *)oskb->link3;
2532                 if (sk->send_head == NULL) {
2533                         sk->send_tail = NULL;
2534                 }
2535 
2536                 /* We may need to remove this from the dev send list. */                
2537                 skb_unlink(oskb);       /* Much easier! */
2538                 sti();
2539                 oskb->magic = 0;
2540                 kfree_skb(oskb, FREE_WRITE); /* write. */
2541                 if (!sk->dead) sk->write_space(sk);
2542         } else {
2543                 break;
2544         }
2545   }
2546 
2547   /*
2548    * Maybe we can take some stuff off of the write queue,
2549    * and put it onto the xmit queue.
2550    */
2551   if (sk->wfront != NULL) {
2552         if (after (sk->window_seq+1, sk->wfront->h.seq) &&
2553                 (sk->retransmits == 0 || 
2554                  sk->timeout != TIME_WRITE ||
2555                  before(sk->wfront->h.seq, sk->rcv_ack_seq +1))
2556                 && sk->packets_out < sk->cong_window) {
2557                 flag |= 1;
2558                 tcp_write_xmit(sk);
2559         } else if (before(sk->window_seq, sk->wfront->h.seq) &&
2560                    sk->send_head == NULL &&
2561                    sk->ack_backlog == 0 &&
2562                    sk->state != TCP_TIME_WAIT) {
2563                 reset_timer(sk, TIME_PROBE0, sk->rto);
2564         }               
2565   } else {
2566         if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2567             sk->state != TCP_TIME_WAIT && !sk->keepopen) {
2568                 DPRINTF((DBG_TCP, "Nothing to do, going to sleep.\n")); 
2569                 if (!sk->dead) sk->write_space(sk);
2570 
2571                 if (sk->keepopen)
2572                         reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2573                 else
2574                         delete_timer(sk);
2575         } else {
2576                 if (sk->state != (unsigned char) sk->keepopen) {
2577                         reset_timer(sk, TIME_WRITE, sk->rto);
2578                 }
2579                 if (sk->state == TCP_TIME_WAIT) {
2580                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2581                 }
2582         }
2583   }
2584 
2585   if (sk->packets_out == 0 && sk->partial != NULL &&
2586       sk->wfront == NULL && sk->send_head == NULL) {
2587         flag |= 1;
2588         tcp_send_partial(sk);
2589   }
2590 
2591   /* See if we are done. */
2592   if (sk->state == TCP_TIME_WAIT) {
2593         if (!sk->dead)
2594                 sk->state_change(sk);
2595         if (sk->rcv_ack_seq == sk->send_seq && sk->acked_seq == sk->fin_seq) {
2596                 flag |= 1;
2597                 sk->state = TCP_CLOSE;
2598                 sk->shutdown = SHUTDOWN_MASK;
2599         }
2600   }
2601 
2602   if (sk->state == TCP_LAST_ACK || sk->state == TCP_FIN_WAIT2) {
2603         if (!sk->dead) sk->state_change(sk);
2604         if (sk->rcv_ack_seq == sk->send_seq) {
2605                 flag |= 1;
2606                 if (sk->acked_seq != sk->fin_seq) {
2607                         tcp_time_wait(sk);
2608                 } else {
2609                         DPRINTF((DBG_TCP, "tcp_ack closing socket - %X\n", sk));
2610                         tcp_send_ack(sk->send_seq, sk->acked_seq, sk,
2611                                      th, sk->daddr);
2612                         sk->shutdown = SHUTDOWN_MASK;
2613                         sk->state = TCP_CLOSE;
2614                 }
2615         }
2616   }
2617 
2618 /*
2619  * I make no guarantees about the first clause in the following
2620  * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
2621  * what conditions "!flag" would be true.  However I think the rest
2622  * of the conditions would prevent that from causing any
2623  * unnecessary retransmission. 
2624  *   Clearly if the first packet has expired it should be 
2625  * retransmitted.  The other alternative, "flag&2 && retransmits", is
2626  * harder to explain:  You have to look carefully at how and when the
2627  * timer is set and with what timeout.  The most recent transmission always
2628  * sets the timer.  So in general if the most recent thing has timed
2629  * out, everything before it has as well.  So we want to go ahead and
2630  * retransmit some more.  If we didn't explicitly test for this
2631  * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
2632  * would not be true.  If you look at the pattern of timing, you can
2633  * show that rto is increased fast enough that the next packet would
2634  * almost never be retransmitted immediately.  Then you'd end up
2635  * waiting for a timeout to send each packet on the retranmission
2636  * queue.  With my implementation of the Karn sampling algorithm,
2637  * the timeout would double each time.  The net result is that it would
2638  * take a hideous amount of time to recover from a single dropped packet.
2639  * It's possible that there should also be a test for TIME_WRITE, but
2640  * I think as long as "send_head != NULL" and "retransmit" is on, we've
2641  * got to be in real retransmission mode.
2642  *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
2643  * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
2644  * As long as no further losses occur, this seems reasonable.
2645  */
2646 
2647   if (((!flag) || (flag&4)) && sk->send_head != NULL &&
2648       (((flag&2) && sk->retransmits) ||
2649        (sk->send_head->when + sk->rto < jiffies))) {
2650         ip_do_retransmit(sk, 1);
2651         reset_timer(sk, TIME_WRITE, sk->rto);
2652       }
2653 
2654   DPRINTF((DBG_TCP, "leaving tcp_ack\n"));
2655   return(1);
2656 }
2657 
2658 
2659 /*
2660  * This routine handles the data.  If there is room in the buffer,
2661  * it will be have already been moved into it.  If there is no
2662  * room, then we will just have to discard the packet.
2663  */
2664 static int
2665 tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
2666          unsigned long saddr, unsigned short len)
2667 {
2668   struct sk_buff *skb1, *skb2;
2669   struct tcphdr *th;
2670   int dup_dumped=0;
2671 
2672   th = skb->h.th;
2673   print_th(th);
2674   skb->len = len -(th->doff*4);
2675 
2676   DPRINTF((DBG_TCP, "tcp_data len = %d sk = %X:\n", skb->len, sk));
2677 
2678   sk->bytes_rcv += skb->len;
2679   if (skb->len == 0 && !th->fin && !th->urg && !th->psh) {
2680         /* Don't want to keep passing ack's back and forth. */
2681         if (!th->ack) tcp_send_ack(sk->send_seq, sk->acked_seq,sk, th, saddr);
2682         kfree_skb(skb, FREE_READ);
2683         return(0);
2684   }
2685 
2686   if (sk->shutdown & RCV_SHUTDOWN) {
2687         sk->acked_seq = th->seq + skb->len + th->syn + th->fin;
2688         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
2689         sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
2690         sk->state = TCP_CLOSE;
2691         sk->err = EPIPE;
2692         sk->shutdown = SHUTDOWN_MASK;
2693         DPRINTF((DBG_TCP, "tcp_data: closing socket - %X\n", sk));
2694         kfree_skb(skb, FREE_READ);
2695         if (!sk->dead) sk->state_change(sk);
2696         return(0);
2697   }
2698 
2699   /*
2700    * Now we have to walk the chain, and figure out where this one
2701    * goes into it.  This is set up so that the last packet we received
2702    * will be the first one we look at, that way if everything comes
2703    * in order, there will be no performance loss, and if they come
2704    * out of order we will be able to fit things in nicely.
2705    */
2706 
2707   /* This should start at the last one, and then go around forwards. */
2708   if (sk->rqueue == NULL) {
2709         DPRINTF((DBG_TCP, "tcp_data: skb = %X:\n", skb));
2710 #ifdef OLDWAY
2711         sk->rqueue = skb;
2712         skb->next = skb;
2713         skb->prev = skb;
2714         skb->list = &sk->rqueue;
2715 #else
2716         skb_queue_head(&sk->rqueue,skb);
2717 #endif          
2718         skb1= NULL;
2719   } else {
2720         DPRINTF((DBG_TCP, "tcp_data adding to chain sk = %X:\n", sk));
2721         for(skb1=sk->rqueue->prev; ; skb1 =(struct sk_buff *)skb1->prev) {
2722                 if(sk->debug)
2723                 {
2724                         printk("skb1=%p :", skb1);
2725                         printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
2726                         printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
2727                         printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
2728                                         sk->acked_seq);
2729                 }
2730 #ifdef OLD              
2731                 if (after(th->seq+1, skb1->h.th->seq)) {
2732                         skb->prev = skb1;
2733                         skb->next = skb1->next;
2734                         skb->next->prev = skb;
2735                         skb1->next = skb;
2736                         if (skb1 == sk->rqueue) sk->rqueue = skb;
2737                         break;
2738                 }
2739                 if (skb1->prev == sk->rqueue) {
2740                         skb->next= skb1;
2741                         skb->prev = skb1->prev;
2742                         skb->prev->next = skb;
2743                         skb1->prev = skb;
2744                         skb1 = NULL; /* so we know we might be able
2745                                         to ack stuff. */
2746                         break;
2747                 }
2748 #else
2749                 if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
2750                 {
2751                         skb_append(skb1,skb);
2752                         skb_unlink(skb1);
2753                         kfree_skb(skb1,FREE_READ);
2754                         dup_dumped=1;
2755                         skb1=NULL;
2756                         break;
2757                 }
2758                 if (after(th->seq+1, skb1->h.th->seq))
2759                 {
2760                         skb_append(skb1,skb);
2761                         break;
2762                 }
2763                 if (skb1 == sk->rqueue)
2764                 {
2765                         skb_queue_head(&sk->rqueue, skb);               
2766                         break;
2767                 }
2768 #endif          
2769         }
2770         DPRINTF((DBG_TCP, "skb = %X:\n", skb));
2771   }
2772 
2773   th->ack_seq = th->seq + skb->len;
2774   if (th->syn) th->ack_seq++;
2775   if (th->fin) th->ack_seq++;
2776 
2777   if (before(sk->acked_seq, sk->copied_seq)) {
2778         printk("*** tcp.c:tcp_data bug acked < copied\n");
2779         sk->acked_seq = sk->copied_seq;
2780   }
2781 
2782   /* Now figure out if we can ack anything. */
2783   if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) {
2784       if (before(th->seq, sk->acked_seq+1)) {
2785                 if (after(th->ack_seq, sk->acked_seq))
2786                                         sk->acked_seq = th->ack_seq;
2787                 skb->acked = 1;
2788 
2789                 /* When we ack the fin, we turn on the RCV_SHUTDOWN flag. */
2790                 if (skb->h.th->fin) {
2791                         if (!sk->dead) sk->state_change(sk);
2792                         sk->shutdown |= RCV_SHUTDOWN;
2793                 }
2794           
2795                 for(skb2 = (struct sk_buff *)skb->next;
2796                     skb2 !=(struct sk_buff *) sk->rqueue;
2797                     skb2 = (struct sk_buff *)skb2->next) {
2798                         if (before(skb2->h.th->seq, sk->acked_seq+1)) {
2799                                 if (after(skb2->h.th->ack_seq, sk->acked_seq))
2800                                 {
2801                                         long old_acked_seq = sk->acked_seq;
2802                                         sk->acked_seq = skb2->h.th->ack_seq;
2803                                         if((int)(sk->acked_seq - old_acked_seq) >0)
2804                                         {
2805                                                 int new_window=sk->window-sk->acked_seq+
2806                                                         old_acked_seq;
2807                                                 if(new_window<0)
2808                                                         new_window=0;
2809                                                 sk->window = new_window;
2810                                         }
2811                                 }
2812                                 skb2->acked = 1;
2813 
2814                                 /*
2815                                  * When we ack the fin, we turn on
2816                                  * the RCV_SHUTDOWN flag.
2817                                  */
2818                                 if (skb2->h.th->fin) {
2819                                         sk->shutdown |= RCV_SHUTDOWN;
2820                                         if (!sk->dead) sk->state_change(sk);
2821                                 }
2822 
2823                                 /* Force an immediate ack. */
2824                                 sk->ack_backlog = sk->max_ack_backlog;
2825                         } else {
2826                                 break;
2827                         }
2828                 }
2829 
2830                 /*
2831                  * This also takes care of updating the window.
2832                  * This if statement needs to be simplified.
2833                  */
2834                 if (!sk->delay_acks ||
2835                     sk->ack_backlog >= sk->max_ack_backlog || 
2836                     sk->bytes_rcv > sk->max_unacked || th->fin) {
2837 /*                      tcp_send_ack(sk->send_seq, sk->acked_seq,sk,th, saddr); */
2838                 } else {
2839                         sk->ack_backlog++;
2840                         if(sk->debug)
2841                                 printk("Ack queued.\n");
2842                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
2843                 }
2844         }
2845   }
2846 
2847   /*
2848    * If we've missed a packet, send an ack.
2849    * Also start a timer to send another.
2850    */
2851   if (!skb->acked) {
2852         /*
2853          * This is important.  If we don't have much room left,
2854          * we need to throw out a few packets so we have a good
2855          * window.  Note that mtu is used, not mss, because mss is really
2856          * for the send side.  He could be sending us stuff as large as mtu.
2857          */
2858         while (sk->prot->rspace(sk) < sk->mtu) {
2859                 skb1 = skb_peek(&sk->rqueue);
2860                 if (skb1 == NULL) {
2861                         printk("INET: tcp.c:tcp_data memory leak detected.\n");
2862                         break;
2863                 }
2864 
2865                 /* Don't throw out something that has been acked. */
2866                 if (skb1->acked) {
2867                         break;
2868                 }
2869                 
2870                 skb_unlink(skb1);
2871 #ifdef OLDWAY           
2872                 if (skb1->prev == skb1) {
2873                         sk->rqueue = NULL;
2874                 } else {
2875                         sk->rqueue = (struct sk_buff *)skb1->prev;
2876                         skb1->next->prev = skb1->prev;
2877                         skb1->prev->next = skb1->next;
2878                 }
2879 #endif          
2880                 kfree_skb(skb1, FREE_READ);
2881         }
2882         tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
2883         sk->ack_backlog++;
2884         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
2885   } else {
2886         /* We missed a packet.  Send an ack to try to resync things. */
2887         tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
2888   }
2889 
2890   /* Now tell the user we may have some data. */
2891   if (!sk->dead) {
2892         if(sk->debug)
2893                 printk("Data wakeup.\n");
2894         sk->data_ready(sk,0);
2895   } else {
2896         DPRINTF((DBG_TCP, "data received on dead socket.\n"));
2897   }
2898 
2899   if (sk->state == TCP_FIN_WAIT2 &&
2900       sk->acked_seq == sk->fin_seq && sk->rcv_ack_seq == sk->send_seq) {
2901         DPRINTF((DBG_TCP, "tcp_data: entering last_ack state sk = %X\n", sk));
2902 
2903 /*      tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr); */
2904         sk->shutdown = SHUTDOWN_MASK;
2905         sk->state = TCP_LAST_ACK;
2906         if (!sk->dead) sk->state_change(sk);
2907   }
2908 
2909   return(0);
2910 }
2911 
2912 
2913 static int
2914 tcp_urg(struct sock *sk, struct tcphdr *th, unsigned long saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
2915 {
2916   extern int kill_pg(int pg, int sig, int priv);
2917   extern int kill_proc(int pid, int sig, int priv);
2918     
2919   if (!sk->dead) 
2920         sk->data_ready(sk,0);
2921     
2922   if (sk->urginline) {
2923         th->urg = 0;
2924         th->psh = 1;
2925         return(0);
2926   }
2927 
2928   if (!sk->urg) {
2929         /* So if we get more urgent data, we don't signal the user again. */
2930         if (sk->proc != 0) {
2931                 if (sk->proc > 0) {
2932                         kill_proc(sk->proc, SIGURG, 1);
2933                 } else {
2934                         kill_pg(-sk->proc, SIGURG, 1);
2935                 }
2936         }
2937   }
2938   sk->urg++;
2939   return(0);
2940 }
2941 
2942 
2943 /* This deals with incoming fins. 'Linus at 9 O'clock' 8-) */
2944 static int
2945 tcp_fin(struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
2946          unsigned long saddr, struct device *dev)
2947 {
2948   DPRINTF((DBG_TCP, "tcp_fin(sk=%X, th=%X, saddr=%X, dev=%X)\n",
2949                                                 sk, th, saddr, dev));
2950   
2951   if (!sk->dead) {
2952         sk->state_change(sk);
2953   }
2954 
2955   switch(sk->state) {
2956         case TCP_SYN_RECV:
2957         case TCP_SYN_SENT:
2958         case TCP_ESTABLISHED:
2959                 /* Contains the one that needs to be acked */
2960                 sk->fin_seq = th->seq+1;
2961                 sk->state = TCP_CLOSE_WAIT;
2962                 if (th->rst) sk->shutdown = SHUTDOWN_MASK;
2963                 break;
2964 
2965         case TCP_CLOSE_WAIT:
2966         case TCP_FIN_WAIT2:
2967                 break; /* we got a retransmit of the fin. */
2968 
2969         case TCP_FIN_WAIT1:
2970                 /* Contains the one that needs to be acked */
2971                 sk->fin_seq = th->seq+1;
2972                 sk->state = TCP_FIN_WAIT2;
2973                 break;
2974 
2975         default:
2976         case TCP_TIME_WAIT:
2977                 sk->state = TCP_LAST_ACK;
2978 
2979                 /* Start the timers. */
2980                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2981                 return(0);
2982   }
2983   sk->ack_backlog++;
2984 
2985   return(0);
2986 }
2987 
2988 
2989 /* This will accept the next outstanding connection. */
2990 static struct sock *
2991 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
2992 {
2993   struct sock *newsk;
2994   struct sk_buff *skb;
2995   
2996   DPRINTF((DBG_TCP, "tcp_accept(sk=%X, flags=%X, addr=%s)\n",
2997                                 sk, flags, in_ntoa(sk->saddr)));
2998 
2999   /*
3000    * We need to make sure that this socket is listening,
3001    * and that it has something pending.
3002    */
3003   if (sk->state != TCP_LISTEN) {
3004         sk->err = EINVAL;
3005         return(NULL); 
3006   }
3007 
3008   /* avoid the race. */
3009   cli();
3010   sk->inuse = 1;
3011   while((skb = get_firstr(sk)) == NULL) {
3012         if (flags & O_NONBLOCK) {
3013                 sti();
3014                 release_sock(sk);
3015                 sk->err = EAGAIN;
3016                 return(NULL);
3017         }
3018 
3019         release_sock(sk);
3020         interruptible_sleep_on(sk->sleep);
3021         if (current->signal & ~current->blocked) {
3022                 sti();
3023                 sk->err = ERESTARTSYS;
3024                 return(NULL);
3025         }
3026         sk->inuse = 1;
3027   }
3028   sti();
3029 
3030   /* Now all we need to do is return skb->sk. */
3031   newsk = skb->sk;
3032 
3033   kfree_skb(skb, FREE_READ);
3034   sk->ack_backlog--;
3035   release_sock(sk);
3036   return(newsk);
3037 }
3038 
3039 
3040 /* This will initiate an outgoing connection. */
3041 static int
3042 tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3043 {
3044   struct sk_buff *buff;
3045   struct sockaddr_in sin;
3046   struct device *dev=NULL;
3047   unsigned char *ptr;
3048   int tmp;
3049   struct tcphdr *t1;
3050   int err;
3051 
3052   if (sk->state != TCP_CLOSE) return(-EISCONN);
3053   if (addr_len < 8) return(-EINVAL);
3054 
3055   err=verify_area(VERIFY_READ, usin, addr_len);
3056   if(err)
3057         return err;
3058         
3059   memcpy_fromfs(&sin,usin, min(sizeof(sin), addr_len));
3060 
3061   if (sin.sin_family && sin.sin_family != AF_INET) return(-EAFNOSUPPORT);
3062 
3063   DPRINTF((DBG_TCP, "TCP connect daddr=%s\n", in_ntoa(sin.sin_addr.s_addr)));
3064   
3065   /* Don't want a TCP connection going to a broadcast address */
3066   if (chk_addr(sin.sin_addr.s_addr) == IS_BROADCAST) { 
3067         DPRINTF((DBG_TCP, "TCP connection to broadcast address not allowed\n"));
3068         return(-ENETUNREACH);
3069   }
3070   
3071   /* Connect back to the same socket: Blows up so disallow it */
3072   if(sk->saddr == sin.sin_addr.s_addr && sk->num==ntohs(sin.sin_port))
3073         return -EBUSY;
3074 
3075   sk->inuse = 1;
3076   sk->daddr = sin.sin_addr.s_addr;
3077   sk->send_seq = jiffies * SEQ_TICK - seq_offset;
3078   sk->window_seq = sk->send_seq;
3079   sk->rcv_ack_seq = sk->send_seq -1;
3080   sk->err = 0;
3081   sk->dummy_th.dest = sin.sin_port;
3082   release_sock(sk);
3083 
3084   buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3085   if (buff == NULL) {
3086         return(-ENOMEM);
3087   }
3088   sk->inuse = 1;
3089   buff->mem_addr = buff;
3090   buff->mem_len = MAX_SYN_SIZE;
3091   buff->len = 24;
3092   buff->sk = sk;
3093   buff->free = 1;
3094   t1 = (struct tcphdr *) buff->data;
3095 
3096   /* Put in the IP header and routing stuff. */
3097   /* We need to build the routing stuff fromt the things saved in skb. */
3098   tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3099                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3100   if (tmp < 0) {
3101         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3102         release_sock(sk);
3103         return(-ENETUNREACH);
3104   }
3105   buff->len += tmp;
3106   t1 = (struct tcphdr *)((char *)t1 +tmp);
3107 
3108   memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3109   t1->seq = ntohl(sk->send_seq++);
3110   buff->h.seq = sk->send_seq;
3111   t1->ack = 0;
3112   t1->window = 2;
3113   t1->res1=0;
3114   t1->res2=0;
3115   t1->rst = 0;
3116   t1->urg = 0;
3117   t1->psh = 0;
3118   t1->syn = 1;
3119   t1->urg_ptr = 0;
3120   t1->doff = 6;
3121 
3122 /* use 512 or whatever user asked for */
3123   if (sk->user_mss)
3124     sk->mtu = sk->user_mss;
3125   else {
3126 #ifdef SUBNETSARELOCAL
3127     if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3128 #else
3129     if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3130 #endif
3131       sk->mtu = 576 - HEADER_SIZE;
3132     else
3133       sk->mtu = MAX_WINDOW;
3134   }
3135 /* but not bigger than device MTU */
3136   sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3137 
3138   /* Put in the TCP options to say MTU. */
3139   ptr = (unsigned char *)(t1+1);
3140   ptr[0] = 2;
3141   ptr[1] = 4;
3142   ptr[2] = (sk->mtu) >> 8;
3143   ptr[3] = (sk->mtu) & 0xff;
3144   tcp_send_check(t1, sk->saddr, sk->daddr,
3145                   sizeof(struct tcphdr) + 4, sk);
3146 
3147   /* This must go first otherwise a really quick response will get reset. */
3148   sk->state = TCP_SYN_SENT;
3149   sk->rtt = TCP_CONNECT_TIME;
3150   reset_timer(sk, TIME_WRITE, TCP_CONNECT_TIME);        /* Timer for repeating the SYN until an answer */
3151   sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3152 
3153   sk->prot->queue_xmit(sk, dev, buff, 0);  
3154   
3155   release_sock(sk);
3156   return(0);
3157 }
3158 
3159 
3160 /* This functions checks to see if the tcp header is actually acceptible. */
3161 static int
3162 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3163              struct options *opt, unsigned long saddr, struct device *dev)
3164 {
3165   /*
3166    * This isn't quite right.  sk->acked_seq could be more recent
3167    * than sk->window.  This is however close enough.  We will accept
3168    * slightly more packets than we should, but it should not cause
3169    * problems unless someone is trying to forge packets.
3170    */
3171   DPRINTF((DBG_TCP, "tcp_sequence(sk=%X, th=%X, len = %d, opt=%d, saddr=%X)\n",
3172           sk, th, len, opt, saddr));
3173 
3174   if (between(th->seq, sk->acked_seq, sk->acked_seq + sk->window)||
3175       between(th->seq + len-(th->doff*4), sk->acked_seq + 1,
3176               sk->acked_seq + sk->window) ||
3177      (before(th->seq, sk->acked_seq) &&
3178        after(th->seq + len -(th->doff*4), sk->acked_seq + sk->window))) {
3179        return(1);
3180    }
3181   DPRINTF((DBG_TCP, "tcp_sequence: rejecting packet.\n"));
3182 
3183   /*
3184    *    Send a reset if we get something not ours and we are
3185    *    unsynchronized. Note: We don't do anything to our end. We
3186    *    are just killing the bogus remote connection then we will
3187    *    connect again and it will work (with luck).
3188    */
3189          
3190   if(sk->state==TCP_SYN_SENT||sk->state==TCP_SYN_RECV)
3191   {
3192         tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3193         return(1);
3194   }
3195 
3196   /*
3197    * If it's too far ahead, send an ack to let the
3198    * other end know what we expect.
3199    */
3200   if (after(th->seq, sk->acked_seq + sk->window)) {
3201         if(!th->rst)
3202                 tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
3203         return(0);
3204   }
3205 
3206 #ifdef undef
3207 /*
3208  * if we do this, we won't respond to keepalive packets, since those
3209  * are slightly out of window, and we have to generate an ack
3210  * a late ack out still not to have a sequence number less than
3211  * one we've seen before.  Berkeley doesn't seem to do this, but it's
3212  * always hard to be sure.
3213  */
3214   /* In case it's just a late ack, let it through. */
3215   if (th->ack && len == (th->doff * 4) &&
3216       after(th->seq, sk->acked_seq - 32767) &&
3217       !th->fin && !th->syn) return(1);
3218 #endif
3219 
3220   if (!th->rst) {
3221         /* Try to resync things. */
3222         tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
3223   }
3224   return(0);
3225 }
3226 
3227 
3228 
3229 
3230 
3231 int
3232 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3233         unsigned long daddr, unsigned short len,
3234         unsigned long saddr, int redo, struct inet_protocol * protocol)
3235 {
3236   struct tcphdr *th;
3237   struct sock *sk;
3238 
3239   if (!skb) {
3240         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv skb = NULL\n"));
3241         return(0);
3242   }
3243 #if 0   /* FIXME: it's ok for protocol to be NULL */
3244   if (!protocol) {
3245         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv protocol = NULL\n"));
3246         return(0);
3247   }
3248 
3249   if (!opt) {   /* FIXME: it's ok for opt to be NULL */
3250         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv opt = NULL\n"));
3251   }
3252 #endif
3253   if (!dev) {
3254         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv dev = NULL\n"));
3255         return(0);
3256   }
3257   th = skb->h.th;
3258 
3259   /* Find the socket. */
3260   sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3261   DPRINTF((DBG_TCP, "<<\n"));
3262   DPRINTF((DBG_TCP, "len = %d, redo = %d, skb=%X\n", len, redo, skb));
3263   
3264   /* If this socket has got a reset its to all intents and purposes 
3265      really dead */
3266   if (sk!=NULL && sk->zapped)
3267         sk=NULL;
3268 
3269   if (sk) {
3270          DPRINTF((DBG_TCP, "sk = %X:\n", sk));
3271   }
3272 
3273   if (!redo) {
3274         if (tcp_check(th, len, saddr, daddr )) {
3275                 skb->sk = NULL;
3276                 DPRINTF((DBG_TCP, "packet dropped with bad checksum.\n"));
3277 if (inet_debug == DBG_SLIP) printk("\rtcp_rcv: bad checksum\n");
3278                 kfree_skb(skb,FREE_READ);
3279                 /*
3280                  * We don't release the socket because it was
3281                  * never marked in use.
3282                  */
3283                 return(0);
3284         }
3285 
3286         /* See if we know about the socket. */
3287         if (sk == NULL) {
3288                 if (!th->rst) 
3289                 {       
3290                         th->seq = ntohl(th->seq);
3291                         /* So reset is always called with th->seq in host order */
3292                         tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3293                 }
3294                 skb->sk = NULL;
3295                 kfree_skb(skb, FREE_READ);
3296                 return(0);
3297         }
3298 
3299         skb->len = len;
3300         skb->sk = sk;
3301         skb->acked = 0;
3302         skb->used = 0;
3303         skb->free = 0;
3304         skb->urg_used = 0;
3305         skb->saddr = daddr;
3306         skb->daddr = saddr;
3307 
3308         th->seq = ntohl(th->seq);
3309 
3310        /* We may need to add it to the backlog here. */
3311        cli();
3312        if (sk->inuse) {
3313                 if (sk->back_log == NULL) {
3314                         sk->back_log = skb;
3315                         skb->next = skb;
3316                         skb->prev = skb;
3317                 } else {
3318                         skb->next = sk->back_log;
3319                         skb->prev = sk->back_log->prev;
3320                         skb->prev->next = skb;
3321                         skb->next->prev = skb;
3322                 }
3323                 sti();
3324                 return(0);
3325         }
3326         sk->inuse = 1;
3327         sti();
3328   } else {
3329         if (!sk) {
3330                 DPRINTF((DBG_TCP, "tcp.c: tcp_rcv bug sk=NULL redo = 1\n"));
3331                 return(0);
3332         }
3333   }
3334 
3335   if (!sk->prot) {
3336         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv sk->prot = NULL \n"));
3337         return(0);
3338   }
3339 
3340   /* Charge the memory to the socket. */
3341   if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) {
3342         skb->sk = NULL;
3343         DPRINTF((DBG_TCP, "dropping packet due to lack of buffer space.\n"));
3344         kfree_skb(skb, FREE_READ);
3345         release_sock(sk);
3346         return(0);
3347   }
3348   sk->rmem_alloc += skb->mem_len;
3349 
3350   DPRINTF((DBG_TCP, "About to do switch.\n"));
3351 
3352   /* Now deal with it. */
3353   switch(sk->state) {
3354         /*
3355          * This should close the system down if it's waiting
3356          * for an ack that is never going to be sent.
3357          */
3358         case TCP_LAST_ACK:
3359                 if (th->rst) {
3360                         sk->zapped=1;
3361                         sk->err = ECONNRESET;
3362                         sk->state = TCP_CLOSE;
3363                         sk->shutdown = SHUTDOWN_MASK;
3364                         if (!sk->dead) {
3365                                 sk->state_change(sk);
3366                         }
3367                         kfree_skb(skb, FREE_READ);
3368                         release_sock(sk);
3369                         return(0);
3370                 }
3371 
3372         case TCP_ESTABLISHED:
3373         case TCP_CLOSE_WAIT:
3374         case TCP_FIN_WAIT1:
3375         case TCP_FIN_WAIT2:
3376         case TCP_TIME_WAIT:
3377                 if (!tcp_sequence(sk, th, len, opt, saddr,dev)) {
3378 if (inet_debug == DBG_SLIP) printk("\rtcp_rcv: not in seq\n");
3379 #ifdef undef
3380 /* nice idea, but tcp_sequence already does this.  Maybe it shouldn't?? */
3381                         if(!th->rst)
3382                                 tcp_send_ack(sk->send_seq, sk->acked_seq, 
3383                                      sk, th, saddr);
3384 #endif
3385                         kfree_skb(skb, FREE_READ);
3386                         release_sock(sk);
3387                         return(0);
3388                 }
3389 
3390                 if (th->rst) {
3391                         sk->zapped=1;
3392                         /* This means the thing should really be closed. */
3393                         sk->err = ECONNRESET;
3394 
3395                         if (sk->state == TCP_CLOSE_WAIT) {
3396                                 sk->err = EPIPE;
3397                         }
3398 
3399                         /*
3400                          * A reset with a fin just means that
3401                          * the data was not all read.
3402                          */
3403                         sk->state = TCP_CLOSE;
3404                         sk->shutdown = SHUTDOWN_MASK;
3405                         if (!sk->dead) {
3406                                 sk->state_change(sk);
3407                         }
3408                         kfree_skb(skb, FREE_READ);
3409                         release_sock(sk);
3410                         return(0);
3411                 }
3412                 if (
3413 #if 0
3414                 if ((opt && (opt->security != 0 ||
3415                             opt->compartment != 0)) || 
3416 #endif
3417                                  th->syn) {
3418                         sk->err = ECONNRESET;
3419                         sk->state = TCP_CLOSE;
3420                         sk->shutdown = SHUTDOWN_MASK;
3421                         tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
3422                         if (!sk->dead) {
3423                                 sk->state_change(sk);
3424                         }
3425                         kfree_skb(skb, FREE_READ);
3426                         release_sock(sk);
3427                         return(0);
3428                 }
3429                 if (th->ack) {
3430                         if (!tcp_ack(sk, th, saddr, len)) {
3431                                 kfree_skb(skb, FREE_READ);
3432                                 release_sock(sk);
3433                                 return(0);
3434                         }
3435                 }
3436                 if (th->urg) {
3437                         if (tcp_urg(sk, th, saddr)) {
3438                                 kfree_skb(skb, FREE_READ);
3439                                 release_sock(sk);
3440                                 return(0);
3441                         }
3442                 }
3443 
3444                 if (tcp_data(skb, sk, saddr, len)) {
3445                         kfree_skb(skb, FREE_READ);
3446                         release_sock(sk);
3447                         return(0);
3448                 }
3449 
3450                 /* Moved: you must do data then fin bit */
3451                 if (th->fin && tcp_fin(sk, th, saddr, dev)) {
3452                         kfree_skb(skb, FREE_READ);
3453                         release_sock(sk);
3454                         return(0);
3455                 }
3456 
3457                 release_sock(sk);
3458                 return(0);
3459 
3460         case TCP_CLOSE:
3461                 if (sk->dead || sk->daddr) {
3462                         DPRINTF((DBG_TCP, "packet received for closed,dead socket\n"));
3463                         kfree_skb(skb, FREE_READ);
3464                         release_sock(sk);
3465                         return(0);
3466                 }
3467 
3468                 if (!th->rst) {
3469                         if (!th->ack)
3470                                 th->ack_seq = 0;
3471                         tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3472                 }
3473                 kfree_skb(skb, FREE_READ);
3474                 release_sock(sk);
3475                 return(0);
3476 
3477         case TCP_LISTEN:
3478                 if (th->rst) {
3479                         kfree_skb(skb, FREE_READ);
3480                         release_sock(sk);
3481                         return(0);
3482                 }
3483                 if (th->ack) {
3484                         tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3485                         kfree_skb(skb, FREE_READ);
3486                         release_sock(sk);
3487                         return(0);
3488                 }
3489 
3490                 if (th->syn) {
3491 #if 0
3492                         if (opt->security != 0 || opt->compartment != 0) {
3493                                 tcp_reset(daddr, saddr, th, prot, opt,dev);
3494                                 release_sock(sk);
3495                                 return(0);
3496                         }
3497 #endif
3498 
3499                         /*
3500                          * Now we just put the whole thing including
3501                          * the header and saddr, and protocol pointer
3502                          * into the buffer.  We can't respond until the
3503                          * user tells us to accept the connection.
3504                          */
3505                         tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
3506                         release_sock(sk);
3507                         return(0);
3508                 }
3509 
3510                 kfree_skb(skb, FREE_READ);
3511                 release_sock(sk);
3512                 return(0);
3513 
3514         default:
3515                 if (!tcp_sequence(sk, th, len, opt, saddr,dev)) {
3516                         kfree_skb(skb, FREE_READ);
3517                         release_sock(sk);
3518                         return(0);
3519                 }
3520 
3521         case TCP_SYN_SENT:
3522                 if (th->rst) {
3523                         sk->err = ECONNREFUSED;
3524                         sk->state = TCP_CLOSE;
3525                         sk->shutdown = SHUTDOWN_MASK;
3526                         sk->zapped = 1;
3527                         if (!sk->dead) {
3528                                 sk->state_change(sk);
3529                         }
3530                         kfree_skb(skb, FREE_READ);
3531                         release_sock(sk);
3532                         return(0);
3533                 }
3534 #if 0
3535                 if (opt->security != 0 || opt->compartment != 0) {
3536                         sk->err = ECONNRESET;
3537                         sk->state = TCP_CLOSE;
3538                         sk->shutdown = SHUTDOWN_MASK;
3539                         tcp_reset(daddr, saddr,  th, sk->prot, opt, dev);
3540                         if (!sk->dead) {
3541                                 wake_up_interruptible(sk->sleep);
3542                         }
3543                         kfree_skb(skb, FREE_READ);
3544                         release_sock(sk);
3545                         return(0);
3546                 }
3547 #endif
3548                 if (!th->ack) {
3549                         if (th->syn) {
3550                                 sk->state = TCP_SYN_RECV;
3551                         }
3552 
3553                         kfree_skb(skb, FREE_READ);
3554                         release_sock(sk);
3555                         return(0);
3556                 }
3557 
3558                 switch(sk->state) {
3559                         case TCP_SYN_SENT:
3560                                 if (!tcp_ack(sk, th, saddr, len)) {
3561                                         tcp_reset(daddr, saddr, th,
3562                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3563                                         kfree_skb(skb, FREE_READ);
3564                                         release_sock(sk);
3565                                         return(0);
3566                                 }
3567 
3568                                 /*
3569                                  * If the syn bit is also set, switch to
3570                                  * tcp_syn_recv, and then to established.
3571                                  */
3572                                 if (!th->syn) {
3573                                         kfree_skb(skb, FREE_READ);
3574                                         release_sock(sk);
3575                                         return(0);
3576                                 }
3577 
3578                                 /* Ack the syn and fall through. */
3579                                 sk->acked_seq = th->seq+1;
3580                                 sk->fin_seq = th->seq;
3581                                 tcp_send_ack(sk->send_seq, th->seq+1,
3582                                                         sk, th, sk->daddr);
3583         
3584                         case TCP_SYN_RECV:
3585                                 if (!tcp_ack(sk, th, saddr, len)) {
3586                                         tcp_reset(daddr, saddr, th,
3587                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
3588                                         kfree_skb(skb, FREE_READ);
3589                                         release_sock(sk);
3590                                         return(0);
3591                                 }
3592                                 sk->state = TCP_ESTABLISHED;
3593 
3594                                 /*
3595                                  * Now we need to finish filling out
3596                                  * some of the tcp header.
3597                                  */
3598                                 /* We need to check for mtu info. */
3599                                 tcp_options(sk, th);
3600                                 sk->dummy_th.dest = th->source;
3601                                 sk->copied_seq = sk->acked_seq-1;
3602                                 if (!sk->dead) {
3603                                         sk->state_change(sk);
3604                                 }
3605 
3606                                 /*
3607                                  * We've already processed his first
3608                                  * ack.  In just about all cases that
3609                                  * will have set max_window.  This is
3610                                  * to protect us against the possibility
3611                                  * that the initial window he sent was 0.
3612                                  * This must occur after tcp_options, which
3613                                  * sets sk->mtu.
3614                                  */
3615                                 if (sk->max_window == 0) {
3616                                   sk->max_window = 32;
3617                                   sk->mss = min(sk->max_window, sk->mtu);
3618                                 }
3619 
3620                                 /*
3621                                  * Now process the rest like we were
3622                                  * already in the established state.
3623                                  */
3624                                 if (th->urg) {
3625                                         if (tcp_urg(sk, th, saddr)) { 
3626                                                 kfree_skb(skb, FREE_READ);
3627                                                 release_sock(sk);
3628                                                 return(0);
3629                                         }
3630                         }
3631                         if (tcp_data(skb, sk, saddr, len))
3632                                                 kfree_skb(skb, FREE_READ);
3633 
3634                         if (th->fin) tcp_fin(sk, th, saddr, dev);
3635                         release_sock(sk);
3636                         return(0);
3637                 }
3638 
3639                 if (th->urg) {
3640                         if (tcp_urg(sk, th, saddr)) {
3641                                 kfree_skb(skb, FREE_READ);
3642                                 release_sock(sk);
3643                                 return(0);
3644                         }
3645                 }
3646 
3647                 if (tcp_data(skb, sk, saddr, len)) {
3648                         kfree_skb(skb, FREE_READ);
3649                         release_sock(sk);
3650                         return(0);
3651                 }
3652 
3653                 if (!th->fin) {
3654                         release_sock(sk);
3655                         return(0);
3656                 }
3657                 tcp_fin(sk, th, saddr, dev);
3658                 release_sock(sk);
3659                 return(0);
3660         }
3661 }
3662 
3663 
3664 /*
3665   * This routine sends a packet with an out of date sequence
3666   * number. It assumes the other end will try to ack it.
3667   */
3668 static void
3669 tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3670 {
3671   struct sk_buff *buff;
3672   struct tcphdr *t1;
3673   struct device *dev=NULL;
3674   int tmp;
3675 
3676   if (sk->zapped)
3677         return; /* Afer a valid reset we can send no more */
3678 
3679   if (sk -> state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) return;
3680 
3681   buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
3682   if (buff == NULL) return;
3683 
3684   buff->mem_addr = buff;
3685   buff->mem_len = MAX_ACK_SIZE;
3686   buff->len = sizeof(struct tcphdr);
3687   buff->free = 1;
3688   buff->sk = sk;
3689   DPRINTF((DBG_TCP, "in tcp_write_wakeup\n"));
3690   t1 = (struct tcphdr *) buff->data;
3691 
3692   /* Put in the IP header and routing stuff. */
3693   tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3694                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
3695   if (tmp < 0) {
3696         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3697         return;
3698   }
3699 
3700   buff->len += tmp;
3701   t1 = (struct tcphdr *)((char *)t1 +tmp);
3702 
3703   memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
3704 
3705   /*
3706    * Use a previous sequence.
3707    * This should cause the other end to send an ack.
3708    */
3709   t1->seq = ntohl(sk->send_seq-1);
3710   t1->ack = 1; 
3711   t1->res1= 0;
3712   t1->res2= 0;
3713   t1->rst = 0;
3714   t1->urg = 0;
3715   t1->psh = 0;
3716   t1->fin = 0;
3717   t1->syn = 0;
3718   t1->ack_seq = ntohl(sk->acked_seq);
3719   t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
3720   t1->doff = sizeof(*t1)/4;
3721   tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
3722 
3723   /* Send it and free it.
3724    * This will prevent the timer from automatically being restarted.
3725   */
3726   sk->prot->queue_xmit(sk, dev, buff, 1);
3727 }
3728 
3729 /*
3730  * This routine probes a zero window.  It makes a copy of the first
3731  * packet in the write queue, but with just one byte of data.
3732  */
3733 void
3734 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3735 {
3736   unsigned char *raw;
3737   struct iphdr *iph;
3738   struct sk_buff *skb2, *skb;
3739   int len, hlen, data;
3740   struct tcphdr *t1;
3741   struct device *dev;
3742 
3743   if (sk->zapped)
3744         return; /* Afer a valid reset we can send no more */
3745 
3746   if (sk -> state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT &&
3747       sk -> state != TCP_FIN_WAIT1 && sk->state != TCP_FIN_WAIT2)
3748         return;
3749 
3750   skb = sk->wfront;
3751   if (skb == NULL)
3752         return;
3753 
3754   dev = skb->dev;
3755   /* I know this can't happen but as it does.. */
3756   if(dev==NULL)
3757     {
3758       printk("tcp_send_probe0: NULL device bug!\n");
3759       return;
3760     }
3761   IS_SKB(skb);
3762 
3763   raw = skb->data;
3764   iph = (struct iphdr *) (raw + dev->hard_header_len);
3765 
3766   hlen = (iph->ihl * sizeof(unsigned long)) + dev->hard_header_len;
3767   data = skb->len - hlen - sizeof(struct tcphdr);
3768   len = hlen + sizeof(struct tcphdr) + (data ? 1 : 0);
3769         
3770   /* Allocate buffer. */
3771   if ((skb2 = alloc_skb(sizeof(struct sk_buff) + len, GFP_ATOMIC)) == NULL) {
3772 /*    printk("alloc failed raw %x th %x hlen %d data %d len %d\n",
3773            raw, skb->h.th, hlen, data, len); */
3774     reset_timer (sk, TIME_PROBE0, 10);  /* try again real soon */
3775     return;
3776   }
3777 
3778   skb2->arp = skb->arp;
3779   skb2->len = len;
3780   skb2->h.raw = (char *)(skb2->data);
3781  
3782   sk->wmem_alloc += skb2->mem_len;
3783  
3784   /* Copy the packet header into the new buffer. */
3785   memcpy(skb2->h.raw, raw, len);
3786  
3787   skb2->h.raw += hlen;  /* it's now h.th -- pointer to the tcp header */
3788   t1 = skb2->h.th;
3789  
3790 /* source, dest, seq, from existing packet */
3791   t1->ack_seq = ntohl(sk->acked_seq);
3792   t1->res1 = 0;
3793 /* doff, fin, from existing packet.  Fin is safe because Linux always
3794  * sends fin in a separate packet
3795  * syn, rst, had better be zero in original */
3796   t1->ack = 1;
3797   t1->urg = 0;  /* urgent pointer might be beyond this fragment */
3798   t1->res2 = 0;
3799   t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
3800   t1->urg_ptr = 0;
3801   tcp_send_check(t1, sk->saddr, sk->daddr, len - hlen, sk);
3802   /* Send it and free it.
3803    * This will prevent the timer from automatically being restarted.
3804    */
3805   sk->prot->queue_xmit(sk, dev, skb2, 1);
3806   sk->backoff++;
3807   /*
3808    * in the case of retransmissions, there's good reason to limit
3809    * rto to 120 sec, as that's the maximum legal RTT on the Internet.
3810    * For probes it could reasonably be longer.  However making it
3811    * much longer could cause unacceptable delays in some situation,
3812    * so we might as well use the same value
3813    */
3814   sk->rto = min(sk->rto << 1, 120*HZ);
3815   reset_timer (sk, TIME_PROBE0, sk->rto);
3816   sk->retransmits++;
3817   sk->prot->retransmits ++;
3818 }
3819 
3820 /*
3821  *      Socket option code for TCP. 
3822  */
3823   
3824 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
3825 {
3826         int val,err;
3827 
3828         if(level!=SOL_TCP)
3829                 return ip_setsockopt(sk,level,optname,optval,optlen);
3830 
3831         if (optval == NULL) 
3832                 return(-EINVAL);
3833 
3834         err=verify_area(VERIFY_READ, optval, sizeof(int));
3835         if(err)
3836                 return err;
3837         
3838         val = get_fs_long((unsigned long *)optval);
3839 
3840         switch(optname)
3841         {
3842                 case TCP_MAXSEG:
3843 /*                      if(val<200||val>2048 || val>sk->mtu) */
3844 /*
3845  * values greater than interface MTU won't take effect.  however at
3846  * the point when this call is done we typically don't yet know
3847  * which interface is going to be used
3848  */
3849                         if(val<1||val>MAX_WINDOW)
3850                                 return -EINVAL;
3851                         sk->user_mss=val;
3852                         return 0;
3853                 case TCP_NODELAY:
3854                         sk->nonagle=(val==0)?0:1;
3855                         return 0;
3856                 default:
3857                         return(-ENOPROTOOPT);
3858         }
3859 }
3860 
3861 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
3862 {
3863         int val,err;
3864 
3865         if(level!=SOL_TCP)
3866                 return ip_getsockopt(sk,level,optname,optval,optlen);
3867                         
3868         switch(optname)
3869         {
3870                 case TCP_MAXSEG:
3871                         val=sk->user_mss;
3872                         break;
3873                 case TCP_NODELAY:
3874                         val=sk->nonagle;        /* Until Johannes stuff is in */
3875                         break;
3876                 default:
3877                         return(-ENOPROTOOPT);
3878         }
3879         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
3880         if(err)
3881                 return err;
3882         put_fs_long(sizeof(int),(unsigned long *) optlen);
3883 
3884         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
3885         if(err)
3886                 return err;
3887         put_fs_long(val,(unsigned long *)optval);
3888 
3889         return(0);
3890 }       
3891 
3892 
3893 struct proto tcp_prot = {
3894   sock_wmalloc,
3895   sock_rmalloc,
3896   sock_wfree,
3897   sock_rfree,
3898   sock_rspace,
3899   sock_wspace,
3900   tcp_close,
3901   tcp_read,
3902   tcp_write,
3903   tcp_sendto,
3904   tcp_recvfrom,
3905   ip_build_header,
3906   tcp_connect,
3907   tcp_accept,
3908   ip_queue_xmit,
3909   tcp_retransmit,
3910   tcp_write_wakeup,
3911   tcp_read_wakeup,
3912   tcp_rcv,
3913   tcp_select,
3914   tcp_ioctl,
3915   NULL,
3916   tcp_shutdown,
3917   tcp_setsockopt,
3918   tcp_getsockopt,
3919   128,
3920   0,
3921   {NULL,},
3922   "TCP"
3923 };

/* [previous][next][first][last][top][bottom][index][help] */