root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. print_th
  3. get_firstr
  4. diff
  5. tcp_select_window
  6. tcp_time_wait
  7. tcp_retransmit
  8. tcp_err
  9. tcp_readable
  10. tcp_select
  11. tcp_ioctl
  12. tcp_check
  13. tcp_send_check
  14. dequeue_partial
  15. enqueue_partial
  16. tcp_send_partial
  17. tcp_send_ack
  18. tcp_build_header
  19. tcp_write
  20. tcp_sendto
  21. tcp_read_wakeup
  22. cleanup_rbuf
  23. tcp_read_urg
  24. tcp_read
  25. tcp_shutdown
  26. tcp_recvfrom
  27. tcp_reset
  28. tcp_options
  29. tcp_conn_request
  30. tcp_close
  31. tcp_write_xmit
  32. sort_send
  33. tcp_ack
  34. tcp_data
  35. tcp_urg
  36. tcp_fin
  37. tcp_accept
  38. tcp_connect
  39. tcp_sequence
  40. tcp_rcv
  41. tcp_write_wakeup
  42. tcp_send_probe0
  43. tcp_setsockopt
  44. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *
  16  * Fixes:       
  17  *              Alan Cox        :       Numerous verify_area() calls
  18  *              Alan Cox        :       Set the ACK bit on a reset
  19  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  20  *                                      and was trying to connect (tcp_err()).
  21  *              Alan Cox        :       All icmp error handling was broken
  22  *                                      pointers passed where wrong and the
  23  *                                      socket was looked up backwards. Nobody
  24  *                                      tested any icmp error code obviously.
  25  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  26  *                                      on errors. select behaves and the icmp error race
  27  *                                      has gone by moving it into sock.c
  28  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  29  *                                      packets for unknown sockets.
  30  *              Alan Cox        :       tcp option processing.
  31  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  32  *              Herp Rosmanith  :       More reset fixes
  33  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  34  *                                      any kind of RST is right out.
  35  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  36  *                                      otherwise odd bits of prattle escape still
  37  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  38  *                                      LAN workplace lockups.
  39  *              Alan Cox        :       Some tidyups using the new skb list facilities
  40  *              Alan Cox        :       sk->keepopen now seems to work
  41  *              Alan Cox        :       Pulls options out correctly on accepts
  42  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  43  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  44  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  45  *              Alan Cox        :       Added some beter commenting, as the tcp is hard to follow
  46  *              Alan Cox        :       Removed incorrect check for 20 * psh
  47  *      Michael O'Reilly        :       ack < copied bug fix.
  48  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  49  *              Alan Cox        :       FIN with no memory -> CRASH
  50  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  51  *              Alan Cox        :       Added TCP options (SOL_TCP)
  52  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  53  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  54  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  55  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  56  *              Alan Cox        :       Put in missing check for SYN bit.
  57  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  58  *                                      window non shrink trick.
  59  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  60  *              Charles Hedrick :       TCP fixes
  61  *              Toomas Tamm     :       TCP window fixes
  62  *
  63  *
  64  * To Fix:
  65  *                      Possibly a problem with accept(). BSD accept never fails after
  66  *              it causes a select. Linux can - given the official select semantics I
  67  *              feel that _really_ its the BSD network programs that are bust (notably
  68  *              inetd, which hangs occasionally because of this).
  69  *                      Add VJ Fastrecovery algorithm ?
  70  *                      Protocol closedown badly messed up.
  71  *                      Incompatiblity with spider ports (tcp hangs on that 
  72  *                      socket occasionally).
  73  *              MSG_PEEK and read on same socket at once can cause crashes.
  74  *
  75  *              This program is free software; you can redistribute it and/or
  76  *              modify it under the terms of the GNU General Public License
  77  *              as published by the Free Software Foundation; either version
  78  *              2 of the License, or(at your option) any later version.
  79  */
  80 #include <linux/types.h>
  81 #include <linux/sched.h>
  82 #include <linux/mm.h>
  83 #include <linux/string.h>
  84 #include <linux/socket.h>
  85 #include <linux/sockios.h>
  86 #include <linux/termios.h>
  87 #include <linux/in.h>
  88 #include <linux/fcntl.h>
  89 #include "inet.h"
  90 #include "dev.h"
  91 #include "ip.h"
  92 #include "protocol.h"
  93 #include "icmp.h"
  94 #include "tcp.h"
  95 #include "skbuff.h"
  96 #include "sock.h"
  97 #include "arp.h"
  98 #include <linux/errno.h>
  99 #include <linux/timer.h>
 100 #include <asm/system.h>
 101 #include <asm/segment.h>
 102 #include <linux/mm.h>
 103 
 104 #define SEQ_TICK 3
 105 unsigned long seq_offset;
 106 
 107 static __inline__ int 
 108 min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 109 {
 110   if (a < b) return(a);
 111   return(b);
 112 }
 113 
 114 
 115 void
 116 print_th(struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
 117 {
 118   unsigned char *ptr;
 119 
 120   if (inet_debug != DBG_TCP) return;
 121 
 122   printk("TCP header:\n");
 123   ptr =(unsigned char *)(th + 1);
 124   printk("    source=%d, dest=%d, seq =%ld, ack_seq = %ld\n",
 125         ntohs(th->source), ntohs(th->dest),
 126         ntohl(th->seq), ntohl(th->ack_seq));
 127   printk("    fin=%d, syn=%d, rst=%d, psh=%d, ack=%d, urg=%d res1=%d res2=%d\n",
 128         th->fin, th->syn, th->rst, th->psh, th->ack,
 129         th->urg, th->res1, th->res2);
 130   printk("    window = %d, check = %d urg_ptr = %d\n",
 131         ntohs(th->window), ntohs(th->check), ntohs(th->urg_ptr));
 132   printk("    doff = %d\n", th->doff);
 133   printk("    options = %d %d %d %d\n", ptr[0], ptr[1], ptr[2], ptr[3]);
 134  }
 135 
 136 
 137 
 138 /* This routine grabs the first thing off of a rcv queue. */
 139 static struct sk_buff *
 140 get_firstr(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 141 {
 142   return skb_dequeue(&sk->rqueue);
 143 }
 144 
 145 /*
 146  *      Difference between two values in tcp ack terms.
 147  */
 148 
 149 static long
 150 diff(unsigned long seq1, unsigned long seq2)
     /* [previous][next][first][last][top][bottom][index][help] */
 151 {
 152   long d;
 153 
 154   d = seq1 - seq2;
 155   if (d > 0) return(d);
 156 
 157   /* I hope this returns what I want. */
 158   return(~d+1);
 159 }
 160 
 161 /* This routine picks a TCP windows for a socket based on
 162    the following constraints
 163    
 164    1. The window can never be shrunk once it is offered (RFC 793)
 165    2. We limit memory per socket
 166    
 167    For now we use NET2E3's heuristic of offering half the memory
 168    we have handy. All is not as bad as this seems however because
 169    of two things. Firstly we will bin packets even within the window
 170    in order to get the data we are waiting for into the memory limit.
 171    Secondly we bin common duplicate forms at receive time
 172 
 173    Better heuristics welcome
 174 */
 175    
 176 static int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 177 {
 178         int new_window = sk->prot->rspace(sk);
 179 
 180         /* Enforce RFC793 - we've offered it we must live with it */    
 181         if(new_window<sk->window)
 182                 return(sk->window);
 183         
 184         return(new_window);
 185 }
 186 
 187 /* Enter the time wait state. */
 188 
 189 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 190 {
 191   sk->state = TCP_TIME_WAIT;
 192   sk->shutdown = SHUTDOWN_MASK;
 193   if (!sk->dead)
 194         sk->state_change(sk);
 195   reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 196 }
 197 
 198 /*
 199  *      A timer event has trigger a tcp retransmit timeout. The
 200  *      socket xmit queue is ready and set up to send. Because
 201  *      the ack receive code keeps the queue straight we do
 202  *      nothing clever here.
 203  */
 204 
 205 static void
 206 tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 207 {
 208   if (all) {
 209         ip_retransmit(sk, all);
 210         return;
 211   }
 212 
 213 /*
 214  *  If we had the full V-J mechanism, this might be right.  But
 215  *  for the moment we want simple slow start after error.
 216  *
 217  *  if (sk->cong_window > 4)
 218  *       sk->cong_window = sk->cong_window / 2;
 219  */
 220  
 221   sk->cong_window = 1;
 222   sk->exp_growth = 0;
 223 
 224   /* Do the actuall retransmit. */
 225   ip_retransmit(sk, all);
 226 }
 227 
 228 
 229 /*
 230  * This routine is called by the ICMP module when it gets some
 231  * sort of error condition.  If err < 0 then the socket should
 232  * be closed and the error returned to the user.  If err > 0
 233  * it's just the icmp type << 8 | icmp code.  After adjustment
 234  * header points to the first 8 bytes of the tcp header.  We need
 235  * to find the appropriate port.
 236  */
 237 void
 238 tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 239         unsigned long saddr, struct inet_protocol *protocol)
 240 {
 241   struct tcphdr *th;
 242   struct sock *sk;
 243   struct iphdr *iph=(struct iphdr *)header;
 244   
 245   header+=4*iph->ihl;
 246    
 247   DPRINTF((DBG_TCP, "TCP: tcp_err(%d, hdr=%X, daddr=%X saddr=%X, protocol=%X)\n",
 248                                         err, header, daddr, saddr, protocol));
 249 
 250   th =(struct tcphdr *)header;
 251   sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr);
 252   print_th(th);
 253 
 254   if (sk == NULL) return;
 255   
 256   if(err<0)
 257   {
 258         sk->err = -err;
 259         sk->error_report(sk);
 260         return;
 261   }
 262 
 263   if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) {
 264         /*
 265          * FIXME:
 266          * For now we will just trigger a linear backoff.
 267          * The slow start code should cause a real backoff here.
 268          */
 269         if (sk->cong_window > 4) sk->cong_window--;
 270         return;
 271   }
 272 
 273   DPRINTF((DBG_TCP, "TCP: icmp_err got error\n"));
 274   sk->err = icmp_err_convert[err & 0xff].errno;
 275 
 276   /*
 277    * If we've already connected we will keep trying
 278    * until we time out, or the user gives up.
 279    */
 280   if (icmp_err_convert[err & 0xff].fatal) {
 281         if (sk->state == TCP_SYN_SENT) {
 282                 sk->state = TCP_CLOSE;
 283                 sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 284         }
 285   }
 286   return;
 287 }
 288 
 289 
 290 /*
 291  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 292  *      in the received data queue (ie a frame missing that needs sending to us)
 293  */
 294 
 295 static int
 296 tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 297 {
 298   unsigned long counted;
 299   unsigned long amount;
 300   struct sk_buff *skb;
 301   int count=0;
 302   int sum;
 303   unsigned long flags;
 304 
 305   DPRINTF((DBG_TCP, "tcp_readable(sk=%X)\n", sk));
 306   if(sk && sk->debug)
 307         printk("tcp_readable: %p - ",sk);
 308 
 309   if (sk == NULL || skb_peek(&sk->rqueue) == NULL)      /* Empty sockets are easy! */
 310   {
 311         if(sk && sk->debug) 
 312                 printk("empty\n");
 313         return(0);
 314   }
 315   
 316   counted = sk->copied_seq+1;   /* Where we are at the moment */
 317   amount = 0;
 318   
 319   save_flags(flags);            /* So nobody adds things at the wrong moment */
 320   cli();
 321   skb =(struct sk_buff *)sk->rqueue;
 322 
 323   /* Do until a push or until we are out of data. */
 324   do {
 325         count++;
 326 #ifdef OLD      
 327         /* This is wrong: It breaks Chameleon amongst other stacks */
 328         if (count > 20) {
 329                 restore_flags(flags);
 330                 DPRINTF((DBG_TCP, "tcp_readable, more than 20 packets without a psh\n"));
 331                 printk("tcp_read: possible read_queue corruption.\n");
 332                 return(amount);
 333         }
 334 #endif  
 335         if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 336                 break;
 337         sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 338         if (skb->h.th->syn) sum++;
 339         if (skb->h.th->urg) {
 340                 sum -= ntohs(skb->h.th->urg_ptr);       /* Dont count urg data */
 341         }
 342         if (sum >= 0) {                                 /* Add it up, move on */
 343                 amount += sum;
 344                 if (skb->h.th->syn) amount--;
 345                 counted += sum;
 346         }
 347         if (amount && skb->h.th->psh) break;
 348         skb =(struct sk_buff *)skb->next;               /* Move along */
 349   } while(skb != sk->rqueue);
 350   restore_flags(flags);
 351   DPRINTF((DBG_TCP, "tcp readable returning %d bytes\n", amount));
 352   if(sk->debug)
 353         printk("got %lu bytes.\n",amount);
 354   return(amount);
 355 }
 356 
 357 
 358 /*
 359  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 360  *      listening socket has a receive queue of sockets to accept.
 361  */
 362 
 363 static int
 364 tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 365 {
 366   DPRINTF((DBG_TCP, "tcp_select(sk=%X, sel_type = %d, wait = %X)\n",
 367                                                 sk, sel_type, wait));
 368 
 369   sk->inuse = 1;
 370   switch(sel_type) {
 371         case SEL_IN:
 372                 if(sk->debug)
 373                         printk("select in");
 374                 select_wait(sk->sleep, wait);
 375                 if(sk->debug)
 376                         printk("-select out");
 377                 if (skb_peek(&sk->rqueue) != NULL) {
 378                         if (sk->state == TCP_LISTEN || tcp_readable(sk)) {
 379                                 release_sock(sk);
 380                                 if(sk->debug)
 381                                         printk("-select ok data\n");
 382                                 return(1);
 383                         }
 384                 }
 385                 if (sk->err != 0)       /* Receiver error */
 386                 {
 387                         release_sock(sk);
 388                         if(sk->debug)
 389                                 printk("-select ok error");
 390                         return(1);
 391                 }
 392                 if (sk->shutdown & RCV_SHUTDOWN) {
 393                         release_sock(sk);
 394                         if(sk->debug)
 395                                 printk("-select ok down\n");
 396                         return(1);
 397                 } else {
 398                         release_sock(sk);
 399                         if(sk->debug)
 400                                 printk("-select fail\n");
 401                         return(0);
 402                 }
 403         case SEL_OUT:
 404                 select_wait(sk->sleep, wait);
 405                 if (sk->shutdown & SEND_SHUTDOWN) {
 406                         DPRINTF((DBG_TCP,
 407                                 "write select on shutdown socket.\n"));
 408 
 409                         /* FIXME: should this return an error? */
 410                         release_sock(sk);
 411                         return(0);
 412                 }
 413 
 414                 /*
 415                  * FIXME:
 416                  * Hack so it will probably be able to write
 417                  * something if it says it's ok to write.
 418                  */
 419                 if (sk->prot->wspace(sk) >= sk->mtu) {
 420                         release_sock(sk);
 421                         /* This should cause connect to work ok. */
 422                         if (sk->state == TCP_SYN_RECV ||
 423                             sk->state == TCP_SYN_SENT) return(0);
 424                         return(1);
 425                 }
 426                 DPRINTF((DBG_TCP,
 427                         "tcp_select: sleeping on write sk->wmem_alloc = %d, "
 428                         "sk->packets_out = %d\n"
 429                         "sk->wback = %X, sk->wfront = %X\n"
 430                         "sk->send_seq = %u, sk->window_seq=%u\n", 
 431                                 sk->wmem_alloc, sk->packets_out,
 432                                 sk->wback, sk->wfront,
 433                                 sk->send_seq, sk->window_seq));
 434 
 435                 release_sock(sk);
 436                 return(0);
 437         case SEL_EX:
 438                 select_wait(sk->sleep,wait);
 439                 if (sk->err) {
 440                         release_sock(sk);
 441                         return(1);
 442                 }
 443                 release_sock(sk);
 444                 return(0);
 445   }
 446 
 447   release_sock(sk);
 448   return(0);
 449 }
 450 
 451 
 452 int
 453 tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 454 {
 455   int err;
 456   DPRINTF((DBG_TCP, "tcp_ioctl(sk=%X, cmd = %d, arg=%X)\n", sk, cmd, arg));
 457   switch(cmd) {
 458         case DDIOCSDBG:
 459                 return(dbg_ioctl((void *) arg, DBG_TCP));
 460 
 461         case TIOCINQ:
 462 #ifdef FIXME    /* FIXME: */
 463         case FIONREAD:
 464 #endif
 465                 {
 466                         unsigned long amount;
 467 
 468                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 469 
 470                         sk->inuse = 1;
 471                         amount = tcp_readable(sk);
 472                         release_sock(sk);
 473                         DPRINTF((DBG_TCP, "returning %d\n", amount));
 474                         err=verify_area(VERIFY_WRITE,(void *)arg,
 475                                                    sizeof(unsigned long));
 476                         if(err)
 477                                 return err;
 478                         put_fs_long(amount,(unsigned long *)arg);
 479                         return(0);
 480                 }
 481         case SIOCATMARK:
 482                 {
 483                         struct sk_buff *skb;
 484                         int answ = 0;
 485 
 486                         /*
 487                          * Try to figure out if we need to read
 488                          * some urgent data.
 489                          */
 490                         sk->inuse = 1;
 491                         if ((skb=skb_peek(&sk->rqueue)) != NULL) 
 492                         {
 493                                 if (sk->copied_seq+1 == skb->h.th->seq && skb->h.th->urg) 
 494                                                 answ = 1;
 495                         }
 496                         release_sock(sk);
 497                         err=verify_area(VERIFY_WRITE,(void *) arg,
 498                                                   sizeof(unsigned long));
 499                         if(err)
 500                                 return err;
 501                         put_fs_long(answ,(int *) arg);
 502                         return(0);
 503                 }
 504         case TIOCOUTQ:
 505                 {
 506                         unsigned long amount;
 507 
 508                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 509                         amount = sk->prot->wspace(sk);
 510                         err=verify_area(VERIFY_WRITE,(void *)arg,
 511                                                    sizeof(unsigned long));
 512                         if(err)
 513                                 return err;
 514                         put_fs_long(amount,(unsigned long *)arg);
 515                         return(0);
 516                 }
 517         default:
 518                 return(-EINVAL);
 519   }
 520 }
 521 
 522 
 523 /* This routine computes a TCP checksum. */
 524 unsigned short
 525 tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 526           unsigned long saddr, unsigned long daddr)
 527 {     
 528   unsigned long sum;
 529    
 530   if (saddr == 0) saddr = my_addr();
 531   print_th(th);
 532   __asm__("\t addl %%ecx,%%ebx\n"
 533           "\t adcl %%edx,%%ebx\n"
 534           "\t adcl $0, %%ebx\n"
 535           : "=b"(sum)
 536           : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 537           : "cx","bx","dx" );
 538    
 539   if (len > 3) {
 540         __asm__("\tclc\n"
 541                 "1:\n"
 542                 "\t lodsl\n"
 543                 "\t adcl %%eax, %%ebx\n"
 544                 "\t loop 1b\n"
 545                 "\t adcl $0, %%ebx\n"
 546                 : "=b"(sum) , "=S"(th)
 547                 : "0"(sum), "c"(len/4) ,"1"(th)
 548                 : "ax", "cx", "bx", "si" );
 549   }
 550    
 551   /* Convert from 32 bits to 16 bits. */
 552   __asm__("\t movl %%ebx, %%ecx\n"
 553           "\t shrl $16,%%ecx\n"
 554           "\t addw %%cx, %%bx\n"
 555           "\t adcw $0, %%bx\n"
 556           : "=b"(sum)
 557           : "0"(sum)
 558           : "bx", "cx");
 559    
 560   /* Check for an extra word. */
 561   if ((len & 2) != 0) {
 562         __asm__("\t lodsw\n"
 563                 "\t addw %%ax,%%bx\n"
 564                 "\t adcw $0, %%bx\n"
 565                 : "=b"(sum), "=S"(th)
 566                 : "0"(sum) ,"1"(th)
 567                 : "si", "ax", "bx");
 568   }
 569    
 570   /* Now check for the extra byte. */
 571   if ((len & 1) != 0) {
 572         __asm__("\t lodsb\n"
 573                 "\t movb $0,%%ah\n"
 574                 "\t addw %%ax,%%bx\n"
 575                 "\t adcw $0, %%bx\n"
 576                 : "=b"(sum)
 577                 : "0"(sum) ,"S"(th)
 578                 : "si", "ax", "bx");
 579   }
 580    
 581   /* We only want the bottom 16 bits, but we never cleared the top 16. */
 582   return((~sum) & 0xffff);
 583 }
 584 
 585 
 586 void
 587 tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 588                unsigned long daddr, int len, struct sock *sk)
 589 {
 590   th->check = 0;
 591   th->check = tcp_check(th, len, saddr, daddr);
 592   return;
 593 }
 594 
 595 static struct sk_buff * dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 596 {
 597         struct sk_buff * skb;
 598         unsigned long flags;
 599 
 600         save_flags(flags);
 601         cli();
 602         skb = sk->send_tmp;
 603         if (skb) {
 604                 sk->send_tmp = skb->next;
 605                 skb->next = NULL;
 606         }
 607         restore_flags(flags);
 608         return skb;
 609 }
 610 
 611 static void enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 612 {
 613         unsigned long flags;
 614 
 615         save_flags(flags);
 616         cli();
 617         skb->next = sk->send_tmp;
 618         sk->send_tmp = skb;
 619         restore_flags(flags);
 620 }
 621 
 622 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 623 {
 624   struct sk_buff *skb;
 625 
 626   if (sk == NULL)
 627         return;
 628   while ((skb = dequeue_partial(sk)) != NULL) {
 629   
 630         /* If we have queued a header size packet.. */
 631         if(skb->len-(unsigned long)skb->h.th + (unsigned long)skb->data == sizeof(struct tcphdr)) {
 632                 /* If its got a syn or fin its notionally included in the size..*/
 633                 if(!skb->h.th->syn && !skb->h.th->fin) {
 634                         printk("tcp_send_partial: attempt to queue a bogon.\n");
 635                         kfree_skb(skb,FREE_WRITE);
 636                         return;
 637                 }
 638         }
 639   
 640         /* We need to complete and send the packet. */
 641         tcp_send_check(skb->h.th, sk->saddr, sk->daddr,
 642                         skb->len-(unsigned long)skb->h.th +
 643                         (unsigned long)skb->data, sk);
 644 
 645         skb->h.seq = sk->send_seq;
 646         if (after(sk->send_seq , sk->window_seq) ||
 647             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 648              sk->packets_out >= sk->cong_window) {
 649                 DPRINTF((DBG_TCP, "sk->cong_window = %d, sk->packets_out = %d\n",
 650                                         sk->cong_window, sk->packets_out));
 651                 DPRINTF((DBG_TCP, "sk->send_seq = %d, sk->window_seq = %d\n",
 652                                         sk->send_seq, sk->window_seq));
 653                 skb->next = NULL;
 654                 skb->magic = TCP_WRITE_QUEUE_MAGIC;
 655                 if (sk->wback == NULL) {
 656                         sk->wfront=skb;
 657                 } else {
 658                         sk->wback->next = skb;
 659                 }
 660                 sk->wback = skb;
 661                 if (before(sk->window_seq, sk->wfront->h.seq) &&
 662                     sk->send_head == NULL &&
 663                     sk->ack_backlog == 0)
 664                   reset_timer(sk, TIME_PROBE0, 
 665                               backoff(sk->backoff) * (2 * sk->mdev + sk->rtt));
 666         } else {
 667                 sk->prot->queue_xmit(sk, skb->dev, skb,0);
 668         }
 669   }
 670 }
 671 
 672 
 673 /* This routine sends an ack and also updates the window. */
 674 static void
 675 tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 676              struct sock *sk,
 677              struct tcphdr *th, unsigned long daddr)
 678 {
 679   struct sk_buff *buff;
 680   struct tcphdr *t1;
 681   struct device *dev = NULL;
 682   int tmp;
 683 
 684   if(sk->zapped)
 685         return;         /* We have been reset, we may not send again */
 686   /*
 687    * We need to grab some memory, and put together an ack,
 688    * and then put it into the queue to be sent.
 689    */
 690   buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 691   if (buff == NULL) {
 692         /* Force it to send an ack. */
 693         sk->ack_backlog++;
 694         if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) {
 695                 reset_timer(sk, TIME_WRITE, 10);
 696         }
 697 if (inet_debug == DBG_SLIP) printk("\rtcp_ack: malloc failed\n");
 698         return;
 699   }
 700 
 701   buff->mem_addr = buff;
 702   buff->mem_len = MAX_ACK_SIZE;
 703   buff->len = sizeof(struct tcphdr);
 704   buff->sk = sk;
 705   t1 =(struct tcphdr *) buff->data;
 706 
 707   /* Put in the IP header and routing stuff. */
 708   tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 709                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 710   if (tmp < 0) {
 711         buff->free=1;
 712         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 713 if (inet_debug == DBG_SLIP) printk("\rtcp_ack: build_header failed\n");
 714         return;
 715   }
 716   buff->len += tmp;
 717   t1 =(struct tcphdr *)((char *)t1 +tmp);
 718 
 719   /* FIXME: */
 720   memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 721 
 722   /* swap the send and the receive. */
 723   t1->dest = th->source;
 724   t1->source = th->dest;
 725   t1->seq = ntohl(sequence);
 726   t1->ack = 1;
 727   sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
 728   t1->window = ntohs(sk->window);
 729   t1->res1 = 0;
 730   t1->res2 = 0;
 731   t1->rst = 0;
 732   t1->urg = 0;
 733   t1->syn = 0;
 734   t1->psh = 0;
 735   t1->fin = 0;
 736   if (ack == sk->acked_seq) {
 737         sk->ack_backlog = 0;
 738         sk->bytes_rcv = 0;
 739         sk->ack_timed = 0;
 740         if (sk->send_head == NULL && sk->wfront == NULL && sk->timeout == TIME_WRITE) 
 741         {
 742                 if(sk->keepopen)
 743                         reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 744                 else
 745                         delete_timer(sk);
 746         }
 747   }
 748   t1->ack_seq = ntohl(ack);
 749   t1->doff = sizeof(*t1)/4;
 750   tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 751   if (sk->debug)
 752          printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 753   sk->prot->queue_xmit(sk, dev, buff, 1);
 754 }
 755 
 756 
 757 /* This routine builds a generic TCP header. */
 758 static int
 759 tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 760 {
 761 
 762   /* FIXME: want to get rid of this. */
 763   memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 764   th->seq = htonl(sk->send_seq);
 765   th->psh =(push == 0) ? 1 : 0;
 766   th->doff = sizeof(*th)/4;
 767   th->ack = 1;
 768   th->fin = 0;
 769   sk->ack_backlog = 0;
 770   sk->bytes_rcv = 0;
 771   sk->ack_timed = 0;
 772   th->ack_seq = htonl(sk->acked_seq);
 773   sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/;
 774   th->window = htons(sk->window);
 775 
 776   return(sizeof(*th));
 777 }
 778 
 779 /*
 780  * This routine copies from a user buffer into a socket,
 781  * and starts the transmit system.
 782  */
 783 static int
 784 tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 785           int len, int nonblock, unsigned flags)
 786 {
 787   int copied = 0;
 788   int copy;
 789   int tmp;
 790   struct sk_buff *skb;
 791   struct sk_buff *send_tmp;
 792   unsigned char *buff;
 793   struct proto *prot;
 794   struct device *dev = NULL;
 795 
 796   DPRINTF((DBG_TCP, "tcp_write(sk=%X, from=%X, len=%d, nonblock=%d, flags=%X)\n",
 797                                         sk, from, len, nonblock, flags));
 798 
 799   sk->inuse=1;
 800   prot = sk->prot;
 801   while(len > 0) {
 802         if (sk->err) {                  /* Stop on an error */
 803                 release_sock(sk);
 804                 if (copied) return(copied);
 805                 tmp = -sk->err;
 806                 sk->err = 0;
 807                 return(tmp);
 808         }
 809 
 810         /* First thing we do is make sure that we are established. */    
 811         if (sk->shutdown & SEND_SHUTDOWN) {
 812                 release_sock(sk);
 813                 sk->err = EPIPE;
 814                 if (copied) return(copied);
 815                 sk->err = 0;
 816                 return(-EPIPE);
 817         }
 818 
 819 
 820         /* Wait for a connection to finish. */
 821         
 822         while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) {
 823                 if (sk->err) {
 824                         release_sock(sk);
 825                         if (copied) return(copied);
 826                         tmp = -sk->err;
 827                         sk->err = 0;
 828                         return(tmp);
 829                 }
 830 
 831                 if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) {
 832                         release_sock(sk);
 833                         DPRINTF((DBG_TCP, "tcp_write: return 1\n"));
 834                         if (copied) return(copied);
 835 
 836                         if (sk->err) {
 837                                 tmp = -sk->err;
 838                                 sk->err = 0;
 839                                 return(tmp);
 840                         }
 841 
 842                         if (sk->keepopen) {
 843                                 send_sig(SIGPIPE, current, 0);
 844                         }
 845                         return(-EPIPE);
 846                 }
 847 
 848                 if (nonblock || copied) {
 849                         release_sock(sk);
 850                         DPRINTF((DBG_TCP, "tcp_write: return 2\n"));
 851                         if (copied) return(copied);
 852                         return(-EAGAIN);
 853                 }
 854 
 855                 release_sock(sk);
 856                 cli();
 857                 if (sk->state != TCP_ESTABLISHED &&
 858                     sk->state != TCP_CLOSE_WAIT && sk->err == 0) {
 859                         interruptible_sleep_on(sk->sleep);
 860                         if (current->signal & ~current->blocked) {
 861                                 sti();
 862                                 DPRINTF((DBG_TCP, "tcp_write: return 3\n"));
 863                                 if (copied) return(copied);
 864                                 return(-ERESTARTSYS);
 865                         }
 866                 }
 867                 sk->inuse = 1;
 868                 sti();
 869         }
 870 
 871         /* Now we need to check if we have a half built packet. */
 872         if ((skb = dequeue_partial(sk)) != NULL) {
 873                 int hdrlen;
 874 
 875                  /* IP header + TCP header */
 876                 hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
 877                          + sizeof(struct tcphdr);
 878 
 879                 /* If sk->mtu has been changed this could cause problems. */
 880 
 881                 /* Add more stuff to the end of skb->len */
 882                 if (!(flags & MSG_OOB)) {
 883                         copy = min(sk->mtu - (skb->len - hdrlen), len);
 884                         /* FIXME: this is really a bug. */
 885                         if (copy <= 0) {
 886                           printk("TCP: **bug**: \"copy\" <= 0!!\n");
 887                           copy = 0;
 888                         }
 889           
 890                         memcpy_fromfs(skb->data + skb->len, from, copy);
 891                         skb->len += copy;
 892                         from += copy;
 893                         copied += copy;
 894                         len -= copy;
 895                         sk->send_seq += copy;
 896                       }
 897                 enqueue_partial(skb, sk);
 898                 if ((skb->len - hdrlen) >= sk->mtu || (flags & MSG_OOB)) {
 899                   tcp_send_partial(sk);
 900                 }
 901                 continue;
 902         }
 903 
 904         /*
 905          * We also need to worry about the window.
 906          * If window < 1/4 offered window, don't use it.  That's
 907          *   silly window prevention.  What we actually do is 
 908          *   use the whole MTU.  Since the results in the right
 909          *   edge of the packet being outside the window, it will
 910          *   be queued for later rather than sent.
 911          */
 912 
 913         copy = diff(sk->window_seq, sk->send_seq);
 914         if (copy < (diff(sk->window_seq, sk->rcv_ack_seq) >> 2))
 915           copy = sk->mtu;
 916         copy = min(copy, sk->mtu);
 917         copy = min(copy, len);
 918 
 919   /* We should really check the window here also. */
 920         if (sk->packets_out && copy < sk->mtu && !(flags & MSG_OOB)) {
 921         /* We will release the socket incase we sleep here. */
 922           release_sock(sk);
 923           skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header + sizeof(*skb), 0, GFP_KERNEL);
 924           sk->inuse = 1;
 925           send_tmp = skb;
 926         } else {
 927                 /* We will release the socket incase we sleep here. */
 928           release_sock(sk);
 929           skb = prot->wmalloc(sk, copy + prot->max_header + sizeof(*skb), 0, GFP_KERNEL);
 930           sk->inuse = 1;
 931           send_tmp = NULL;
 932         }
 933 
 934         /* If we didn't get any memory, we need to sleep. */
 935         if (skb == NULL) {
 936                 if (nonblock /* || copied */) {
 937                         release_sock(sk);
 938                         DPRINTF((DBG_TCP, "tcp_write: return 4\n"));
 939                         if (copied) return(copied);
 940                         return(-EAGAIN);
 941                 }
 942 
 943                 /* FIXME: here is another race condition. */
 944                 tmp = sk->wmem_alloc;
 945                 release_sock(sk);
 946                 cli();
 947                 /* Again we will try to avoid it. */
 948                 if (tmp <= sk->wmem_alloc &&
 949                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
 950                                 && sk->err == 0) {
 951                         interruptible_sleep_on(sk->sleep);
 952                         if (current->signal & ~current->blocked) {
 953                                 sti();
 954                                 DPRINTF((DBG_TCP, "tcp_write: return 5\n"));
 955                                 if (copied) return(copied);
 956                                 return(-ERESTARTSYS);
 957                         }
 958                 }
 959                 sk->inuse = 1;
 960                 sti();
 961                 continue;
 962         }
 963 
 964         skb->len = 0;
 965         skb->sk = sk;
 966         skb->free = 0;
 967 
 968         buff = skb->data;
 969 
 970         /*
 971          * FIXME: we need to optimize this.
 972          * Perhaps some hints here would be good.
 973          */
 974         tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
 975                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
 976         if (tmp < 0 ) {
 977                 prot->wfree(sk, skb->mem_addr, skb->mem_len);
 978                 release_sock(sk);
 979                 DPRINTF((DBG_TCP, "tcp_write: return 6\n"));
 980                 if (copied) return(copied);
 981                 return(tmp);
 982         }
 983         skb->len += tmp;
 984         skb->dev = dev;
 985         buff += tmp;
 986         skb->h.th =(struct tcphdr *) buff;
 987         tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
 988         if (tmp < 0) {
 989                 prot->wfree(sk, skb->mem_addr, skb->mem_len);
 990                 release_sock(sk);
 991                 DPRINTF((DBG_TCP, "tcp_write: return 7\n"));
 992                 if (copied) return(copied);
 993                 return(tmp);
 994         }
 995 
 996         if (flags & MSG_OOB) {
 997                 ((struct tcphdr *)buff)->urg = 1;
 998                 ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
 999         }
1000         skb->len += tmp;
1001         memcpy_fromfs(buff+tmp, from, copy);
1002 
1003         from += copy;
1004         copied += copy;
1005         len -= copy;
1006         skb->len += copy;
1007         skb->free = 0;
1008         sk->send_seq += copy;
1009 
1010         if (send_tmp != NULL) {
1011                 enqueue_partial(send_tmp, sk);
1012                 continue;
1013         }
1014 
1015         tcp_send_check((struct tcphdr *)buff, sk->saddr, sk->daddr,
1016                         copy + sizeof(struct tcphdr), sk);
1017 
1018         skb->h.seq = sk->send_seq;
1019         if (after(sk->send_seq , sk->window_seq) ||
1020                   (sk->retransmits && sk->timeout == TIME_WRITE) ||
1021                   sk->packets_out >= sk->cong_window) {
1022                 DPRINTF((DBG_TCP, "sk->cong_window = %d, sk->packets_out = %d\n",
1023                                         sk->cong_window, sk->packets_out));
1024                 DPRINTF((DBG_TCP, "sk->send_seq = %d, sk->window_seq = %d\n",
1025                                         sk->send_seq, sk->window_seq));
1026                 skb->next = NULL;
1027                 skb->magic = TCP_WRITE_QUEUE_MAGIC;
1028                 if (sk->wback == NULL) {
1029                         sk->wfront = skb;
1030                 } else {
1031                         sk->wback->next = skb;
1032                 }
1033                 sk->wback = skb;
1034                 if (before(sk->window_seq, sk->wfront->h.seq) &&
1035                    sk->send_head == NULL &&
1036                    sk->ack_backlog == 0)
1037                         reset_timer(sk, TIME_PROBE0, 
1038                             backoff(sk->backoff) * (2 * sk->mdev + sk->rtt));
1039         } else {
1040                 prot->queue_xmit(sk, dev, skb,0);
1041         }
1042   }
1043   sk->err = 0;
1044 
1045 /*
1046  *      Nagles rule. Turn Nagle off with TCP_NODELAY for highly
1047  *      interactive fast network servers. It's meant to be on and
1048  *      it really improves the throughput though not the echo time
1049  *      on my slow slip link - Alan
1050  */
1051 
1052   /* Avoid possible race on send_tmp - c/o Johannes Stille */
1053   if(sk->send_tmp && 
1054      ((!sk->packets_out) 
1055      /* If not nagling we can send on the before case too.. */
1056       || (sk->nonagle && before(sk->send_seq , sk->window_seq))
1057       ))
1058         tcp_send_partial(sk);
1059   /* -- */
1060   release_sock(sk);
1061   DPRINTF((DBG_TCP, "tcp_write: return 8\n"));
1062   return(copied);
1063 }
1064 
1065 
1066 static int
1067 tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1068            int len, int nonblock, unsigned flags,
1069            struct sockaddr_in *addr, int addr_len)
1070 {
1071   struct sockaddr_in sin;
1072 
1073   if (addr_len < sizeof(sin)) return(-EINVAL);
1074   memcpy_fromfs(&sin, addr, sizeof(sin));
1075   if (sin.sin_family && sin.sin_family != AF_INET) return(-EINVAL);
1076   if (sin.sin_port != sk->dummy_th.dest) return(-EINVAL);
1077   if (sin.sin_addr.s_addr != sk->daddr) return(-EINVAL);
1078   return(tcp_write(sk, from, len, nonblock, flags));
1079 }
1080 
1081 
1082 static void
1083 tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1084 {
1085   int tmp;
1086   struct device *dev = NULL;
1087   struct tcphdr *t1;
1088   struct sk_buff *buff;
1089 
1090   DPRINTF((DBG_TCP, "in tcp read wakeup\n"));
1091   if (!sk->ack_backlog) return;
1092 
1093   /*
1094    * FIXME: we need to put code here to prevent this routine from
1095    * being called.  Being called once in a while is ok, so only check
1096    * if this is the second time in a row.
1097    */
1098 
1099   /*
1100    * We need to grab some memory, and put together an ack,
1101    * and then put it into the queue to be sent.
1102    */
1103   buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1104   if (buff == NULL) {
1105         /* Try again real soon. */
1106         reset_timer(sk, TIME_WRITE, 10);
1107         return;
1108   }
1109 
1110   buff->mem_addr = buff;
1111   buff->mem_len = MAX_ACK_SIZE;
1112   buff->len = sizeof(struct tcphdr);
1113   buff->sk = sk;
1114 
1115   /* Put in the IP header and routing stuff. */
1116   tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1117                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1118   if (tmp < 0) {
1119         buff->free=1;
1120         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1121         return;
1122   }
1123 
1124   buff->len += tmp;
1125   t1 =(struct tcphdr *)(buff->data +tmp);
1126 
1127   memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1128   t1->seq = ntohl(sk->send_seq);
1129   t1->ack = 1;
1130   t1->res1 = 0;
1131   t1->res2 = 0;
1132   t1->rst = 0;
1133   t1->urg = 0;
1134   t1->syn = 0;
1135   t1->psh = 0;
1136   sk->ack_backlog = 0;
1137   sk->bytes_rcv = 0;
1138   sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
1139   t1->window = ntohs(sk->window);
1140   t1->ack_seq = ntohl(sk->acked_seq);
1141   t1->doff = sizeof(*t1)/4;
1142   tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1143   sk->prot->queue_xmit(sk, dev, buff, 1);
1144 }
1145 
1146 
1147 /*
1148  * FIXME:
1149  * This routine frees used buffers.
1150  * It should consider sending an ACK to let the
1151  * other end know we now have a bigger window.
1152  */
1153 static void
1154 cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1155 {
1156   unsigned long flags;
1157   int left;
1158   struct sk_buff *skb;
1159 
1160   if(sk->debug)
1161         printk("cleaning rbuf for sk=%p\n", sk);
1162   
1163   save_flags(flags);
1164   cli();
1165   
1166   left = sk->prot->rspace(sk);
1167  
1168   /*
1169    * We have to loop through all the buffer headers,
1170    * and try to free up all the space we can.
1171    */
1172   while((skb=skb_peek(&sk->rqueue)) != NULL ) 
1173   {
1174         if (!skb->used) 
1175                 break;
1176         skb_unlink(skb);
1177         skb->sk = sk;
1178         kfree_skb(skb, FREE_READ);
1179   }
1180 
1181   restore_flags(flags);
1182 
1183   /*
1184    * FIXME:
1185    * At this point we should send an ack if the difference
1186    * in the window, and the amount of space is bigger than
1187    * TCP_WINDOW_DIFF.
1188    */
1189   DPRINTF((DBG_TCP, "sk->window left = %d, sk->prot->rspace(sk)=%d\n",
1190                         sk->window - sk->bytes_rcv, sk->prot->rspace(sk)));
1191 
1192   if(sk->debug)
1193         printk("sk->rspace = %lu, was %d\n", sk->prot->rspace(sk),
1194                                             left);
1195   if (sk->prot->rspace(sk) != left) 
1196   {
1197         /*
1198          * This area has caused the most trouble.  The current strategy
1199          * is to simply do nothing if the other end has room to send at
1200          * least 3 full packets, because the ack from those will auto-
1201          * matically update the window.  If the other end doesn't think
1202          * we have much space left, but we have room for atleast 1 more
1203          * complete packet than it thinks we do, we will send an ack
1204          * immediatedly.  Otherwise we will wait up to .5 seconds in case
1205          * the user reads some more.
1206          */
1207         sk->ack_backlog++;
1208         if ((sk->prot->rspace(sk) > (sk->window - sk->bytes_rcv + sk->mtu))) {
1209                 /* Send an ack right now. */
1210                 tcp_read_wakeup(sk);
1211         } else {
1212                 /* Force it to send an ack soon. */
1213                 int was_active = del_timer(&sk->timer);
1214                 if (!was_active || TCP_ACK_TIME < sk->timer.expires) {
1215                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1216                 } else
1217                         add_timer(&sk->timer);
1218         }
1219   }
1220 } 
1221 
1222 
1223 /* Handle reading urgent data. */
1224 static int
1225 tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1226              unsigned char *to, int len, unsigned flags)
1227 {
1228   int copied = 0;
1229   struct sk_buff *skb;
1230 
1231   DPRINTF((DBG_TCP, "tcp_read_urg(sk=%X, to=%X, len=%d, flags=%X)\n",
1232                                         sk, to, len, flags));
1233 
1234   while(len > 0) 
1235   {
1236         sk->inuse = 1;
1237         while(sk->urg==0 || skb_peek(&sk->rqueue) == NULL) {
1238                 if (sk->err) {
1239                         int tmp;
1240 
1241                         release_sock(sk);
1242                         if (copied) return(copied);
1243                         tmp = -sk->err;
1244                         sk->err = 0;
1245                         return(tmp);
1246                 }
1247 
1248                 if (sk->state == TCP_CLOSE || sk->done) {
1249                         release_sock(sk);
1250                         if (copied) return(copied);
1251                         if (!sk->done) {
1252                                 sk->done = 1;
1253                                 return(0);
1254                         }
1255                         return(-ENOTCONN);
1256                 }
1257                  
1258                 if (sk->shutdown & RCV_SHUTDOWN) {
1259                         release_sock(sk);
1260                         if (copied == 0) 
1261                                 sk->done = 1;
1262                         return(copied);
1263                 }
1264 
1265                 if (nonblock || copied) {
1266                         release_sock(sk);
1267                         if (copied) return(copied);
1268                         return(-EAGAIN);
1269                 }
1270 
1271                 /* Now at this point, we may have gotten some data. */
1272                 release_sock(sk);
1273                 cli();
1274                 if ((sk->urg == 0 || skb_peek(&sk->rqueue) == NULL) &&
1275                     sk->err == 0 && !(sk->shutdown & RCV_SHUTDOWN)) {
1276                         interruptible_sleep_on(sk->sleep);
1277                         if (current->signal & ~current->blocked) {
1278                                 sti();
1279                                 if (copied) return(copied);
1280                                 return(-ERESTARTSYS);
1281                         }
1282                 }
1283                 sk->inuse = 1;
1284                 sti();
1285         }
1286 
1287         skb = skb_peek(&sk->rqueue);
1288         do {
1289                 int amt;
1290 
1291                 if (skb->h.th->urg && !skb->urg_used) {
1292                         if (skb->h.th->urg_ptr == 0) {
1293                                 skb->h.th->urg_ptr = ntohs(skb->len);
1294                         }
1295                         amt = min(ntohs(skb->h.th->urg_ptr),len);
1296                         if(amt)
1297                         {
1298                                 memcpy_tofs(to,(unsigned char *)(skb->h.th) +
1299                                                         skb->h.th->doff*4, amt);
1300                         }
1301 
1302                         if (!(flags & MSG_PEEK)) {
1303                                 skb->urg_used = 1;
1304                                 sk->urg--;
1305                         }
1306                         release_sock(sk);
1307                         copied += amt;
1308                         return(copied);
1309                 }
1310                 skb =(struct sk_buff *)skb->next;
1311         } while(skb != sk->rqueue);
1312   }
1313 /*sk->urg = 0;*/
1314   release_sock(sk);
1315   return(0);
1316 }
1317 
1318 
1319 /* This routine copies from a sock struct into the user buffer. */
1320 static int
1321 tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1322          int len, int nonblock, unsigned flags)
1323 {
1324   int copied=0; /* will be used to say how much has been copied. */
1325   struct sk_buff *skb;
1326   unsigned long offset;
1327   unsigned long used;
1328   int err;
1329 
1330   if (len == 0) return(0);
1331   if (len < 0) {
1332         return(-EINVAL);
1333   }
1334     
1335   err=verify_area(VERIFY_WRITE,to,len);
1336   if(err)
1337         return err;
1338         
1339   /* This error should be checked. */
1340   if (sk->state == TCP_LISTEN) return(-ENOTCONN);
1341 
1342   /* Urgent data needs to be handled specially. */
1343   if ((flags & MSG_OOB)) 
1344         return(tcp_read_urg(sk, nonblock, to, len, flags));
1345 
1346   /* So no-one else will use this socket. */
1347   sk->inuse = 1;
1348   
1349   skb=skb_peek(&sk->rqueue);
1350 
1351   DPRINTF((DBG_TCP, "tcp_read(sk=%X, to=%X, len=%d, nonblock=%d, flags=%X)\n",
1352                                                 sk, to, len, nonblock, flags));
1353 
1354   while(len > 0) {
1355         /* skb->used just checks to see if we've gone all the way around. */
1356         
1357         /* While no data, or first data indicates some is missing, or data is used */
1358         while(skb == NULL ||
1359               before(sk->copied_seq+1, skb->h.th->seq) || skb->used) {
1360                 DPRINTF((DBG_TCP, "skb = %X:\n", skb));
1361                 cleanup_rbuf(sk);
1362                 if (sk->err) 
1363                 {
1364                         int tmp;
1365 
1366                         release_sock(sk);
1367                         if (copied) 
1368                         {
1369                                 DPRINTF((DBG_TCP, "tcp_read: returning %d\n",
1370                                                                         copied));
1371                                 return(copied);
1372                         }
1373                         tmp = -sk->err;
1374                         sk->err = 0;
1375                         return(tmp);
1376                 }
1377 
1378                 if (sk->state == TCP_CLOSE) 
1379                 {
1380                         release_sock(sk);
1381                         if (copied) {
1382                                 DPRINTF((DBG_TCP, "tcp_read: returning %d\n",
1383                                                                 copied));
1384                                 return(copied);
1385                         }
1386                         if (!sk->done) {
1387                                 sk->done = 1;
1388                                 return(0);
1389                         }
1390                         return(-ENOTCONN);
1391                 }
1392 
1393                 if (sk->shutdown & RCV_SHUTDOWN) 
1394                 {
1395                         release_sock(sk);
1396                         if (copied == 0) sk->done = 1;
1397                         DPRINTF((DBG_TCP, "tcp_read: returning %d\n", copied));
1398                         return(copied);
1399                 }
1400                         
1401                 if (nonblock || copied) 
1402                 {
1403                         release_sock(sk);
1404                         if(sk->debug)
1405                                 printk("read: EAGAIN\n");
1406                         if (copied) 
1407                         {
1408                                 DPRINTF((DBG_TCP, "tcp_read: returning %d\n",
1409                                                                 copied));
1410                                 return(copied);
1411                         }
1412                         return(-EAGAIN);
1413                 }
1414 
1415                 if ((flags & MSG_PEEK) && copied != 0) 
1416                 {
1417                         release_sock(sk);
1418                         DPRINTF((DBG_TCP, "tcp_read: returning %d\n", copied));
1419                         return(copied);
1420                 }
1421                  
1422                 DPRINTF((DBG_TCP, "tcp_read about to sleep. state = %d\n",
1423                                                                 sk->state));
1424                 release_sock(sk);
1425 
1426                 /*
1427                  * Now we may have some data waiting or we could
1428                  * have changed state.
1429                  */
1430                 cli();
1431                 if (sk->shutdown & RCV_SHUTDOWN || sk->err != 0) {
1432                         sk->inuse = 1;
1433                         sti();
1434                         continue;
1435                 }
1436 
1437                 if (skb_peek(&sk->rqueue) == NULL ||
1438                     before(sk->copied_seq+1, sk->rqueue->h.th->seq)) {
1439                         if(sk->debug)
1440                                 printk("Read wait sleep\n");
1441                         interruptible_sleep_on(sk->sleep);
1442                         if(sk->debug)
1443                                 printk("Read wait wakes\n");
1444                         if (current->signal & ~current->blocked) {
1445                                 sti();
1446                                 if (copied) {
1447                                         DPRINTF((DBG_TCP, "tcp_read: returning %d\n",
1448                                                                 copied));
1449                                         return(copied);
1450                                 }
1451                                 return(-ERESTARTSYS);
1452                         }
1453                 }
1454                 sk->inuse = 1;
1455                 sti();
1456                 DPRINTF((DBG_TCP, "tcp_read woke up. \n"));
1457 
1458 
1459                 skb=skb_peek(&sk->rqueue);
1460                 /* That may have been null if we were beaten, if so we loop again */
1461         }
1462 
1463         /*
1464          * Copy anything from the current block that needs
1465          * to go into the user buffer.
1466          */
1467          offset = sk->copied_seq+1 - skb->h.th->seq;
1468   
1469          if (skb->h.th->syn) offset--;
1470          if (offset < skb->len) /* Some of the packet is useful */
1471          {
1472                 /*
1473                  * If there is urgent data we must either
1474                  * return or skip over it.
1475                  */
1476                 if (skb->h.th->urg) 
1477                 {
1478                         if (skb->urg_used) 
1479                         {
1480                                 sk->copied_seq += ntohs(skb->h.th->urg_ptr);
1481                                 offset += ntohs(skb->h.th->urg_ptr);
1482                                 if (offset >= skb->len) 
1483                                 {
1484                                         skb->used = 1;
1485                                         skb =(struct sk_buff *)skb->next;
1486                                         continue;
1487                                 }
1488                         } 
1489                         else 
1490                         {
1491                                 release_sock(sk);
1492                                 if (copied) 
1493                                         return(copied);
1494                                 send_sig(SIGURG, current, 0);
1495                                 return(-EINTR);
1496                         }
1497                 }
1498                 /* Ok so how much can we use ? */
1499                 used = min(skb->len - offset, len);
1500                 /* Copy it */
1501                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1502                             skb->h.th->doff*4 + offset, used);
1503                 copied += used;
1504                 len -= used;
1505                 to += used;
1506                 
1507                 /* If we were reading the data is 'eaten' */
1508                 if (!(flags & MSG_PEEK)) 
1509                         sk->copied_seq += used;
1510               
1511                 /*
1512                  * Mark this data used if we are really reading it,
1513                  * and if it doesn't contain any urgent data. And we
1514                  * have used all the data.
1515                  */
1516                 if (!(flags & MSG_PEEK) &&
1517                    (!skb->h.th->urg || skb->urg_used) &&
1518                    (used + offset >= skb->len)) 
1519                         skb->used = 1;
1520               
1521                 /*
1522                  * See if this is the end of a message or if the
1523                  * remaining data is urgent.
1524                  */
1525                 if (/*skb->h.th->psh || */skb->h.th->urg) 
1526                 {
1527                         break;
1528                 }
1529         } 
1530         else 
1531         {       /* already used this data, must be a retransmit */
1532                 skb->used = 1;
1533         }
1534         /* Move along a packet */
1535         skb =(struct sk_buff *)skb->next;
1536   }
1537   /* Clean up data we have read: This will do ACK frames */
1538   cleanup_rbuf(sk);
1539   release_sock(sk);
1540   DPRINTF((DBG_TCP, "tcp_read: returning %d\n", copied));
1541   if (copied == 0 && nonblock) 
1542         return(-EAGAIN);
1543   return(copied);
1544 }
1545 
1546   
1547 /*
1548  * Send a FIN without closing the connection.
1549  * Not called at interrupt time.
1550  */
1551 void
1552 tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1553 {
1554   struct sk_buff *buff;
1555   struct tcphdr *t1, *th;
1556   struct proto *prot;
1557   int tmp;
1558   struct device *dev = NULL;
1559 
1560   /*
1561    * We need to grab some memory, and put together a FIN,
1562    * and then put it into the queue to be sent.
1563    * FIXME:
1564    *    Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1565    *    Most of this is guesswork, so maybe it will work...
1566    */
1567   /* If we've already sent a FIN, return. */
1568   if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2) return;
1569   if (!(how & SEND_SHUTDOWN)) return;
1570   sk->inuse = 1;
1571 
1572   /* Clear out any half completed packets. */
1573   if (sk->send_tmp) tcp_send_partial(sk);
1574 
1575   prot =(struct proto *)sk->prot;
1576   th =(struct tcphdr *)&sk->dummy_th;
1577   release_sock(sk); /* incase the malloc sleeps. */
1578   buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1579   if (buff == NULL) return;
1580   sk->inuse = 1;
1581 
1582   DPRINTF((DBG_TCP, "tcp_shutdown_send buff = %X\n", buff));
1583   buff->mem_addr = buff;
1584   buff->mem_len = MAX_RESET_SIZE;
1585   buff->sk = sk;
1586   buff->len = sizeof(*t1);
1587   t1 =(struct tcphdr *) buff->data;
1588 
1589   /* Put in the IP header and routing stuff. */
1590   tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1591                            IPPROTO_TCP, sk->opt,
1592                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1593   if (tmp < 0) {
1594         buff->free=1;
1595         prot->wfree(sk,buff->mem_addr, buff->mem_len);
1596         release_sock(sk);
1597         DPRINTF((DBG_TCP, "Unable to build header for fin.\n"));
1598         return;
1599   }
1600 
1601   t1 =(struct tcphdr *)((char *)t1 +tmp);
1602   buff->len += tmp;
1603   buff->dev = dev;
1604   memcpy(t1, th, sizeof(*t1));
1605   t1->seq = ntohl(sk->send_seq);
1606   sk->send_seq++;
1607   buff->h.seq = sk->send_seq;
1608   t1->ack = 1;
1609   t1->ack_seq = ntohl(sk->acked_seq);
1610   t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
1611   t1->fin = 1;
1612   t1->rst = 0;
1613   t1->doff = sizeof(*t1)/4;
1614   tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1615 
1616   /*
1617    * Can't just queue this up.
1618    * It should go at the end of the write queue.
1619    */
1620   if (sk->wback != NULL) {
1621         buff->free=0;   
1622         buff->next = NULL;
1623         sk->wback->next = buff;
1624         sk->wback = buff;
1625         buff->magic = TCP_WRITE_QUEUE_MAGIC;
1626   } else {
1627         sk->prot->queue_xmit(sk, dev, buff, 0);
1628   }
1629 
1630   if (sk->state == TCP_ESTABLISHED) sk->state = TCP_FIN_WAIT1;
1631     else sk->state = TCP_FIN_WAIT2;
1632 
1633   release_sock(sk);
1634 }
1635 
1636 
1637 static int
1638 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1639              int to_len, int nonblock, unsigned flags,
1640              struct sockaddr_in *addr, int *addr_len)
1641 {
1642   struct sockaddr_in sin;
1643   int len;
1644   int err;
1645   int result;
1646   
1647   /* Have to check these first unlike the old code. If 
1648      we check them after we lose data on an error
1649      which is wrong */
1650   err = verify_area(VERIFY_WRITE,addr_len,sizeof(long));
1651   if(err)
1652         return err;
1653   len = get_fs_long(addr_len);
1654   if(len > sizeof(sin))
1655         len = sizeof(sin);
1656   err=verify_area(VERIFY_WRITE, addr, len);  
1657   if(err)
1658         return err;
1659         
1660   result=tcp_read(sk, to, to_len, nonblock, flags);
1661 
1662   if (result < 0) return(result);
1663   
1664   sin.sin_family = AF_INET;
1665   sin.sin_port = sk->dummy_th.dest;
1666   sin.sin_addr.s_addr = sk->daddr;
1667 
1668   memcpy_tofs(addr, &sin, len);
1669   put_fs_long(len, addr_len);
1670   return(result);
1671 }
1672 
1673 
1674 /* This routine will send an RST to the other tcp. */
1675 static void
1676 tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1677           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1678 {
1679   struct sk_buff *buff;
1680   struct tcphdr *t1;
1681   int tmp;
1682 
1683   /*
1684    * We need to grab some memory, and put together an RST,
1685    * and then put it into the queue to be sent.
1686    */
1687   buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1688   if (buff == NULL) 
1689         return;
1690 
1691   DPRINTF((DBG_TCP, "tcp_reset buff = %X\n", buff));
1692   buff->mem_addr = buff;
1693   buff->mem_len = MAX_RESET_SIZE;
1694   buff->len = sizeof(*t1);
1695   buff->sk = NULL;
1696   buff->dev = dev;
1697 
1698   t1 =(struct tcphdr *) buff->data;
1699 
1700   /* Put in the IP header and routing stuff. */
1701   tmp = prot->build_header(buff, saddr, daddr, &dev, IPPROTO_TCP, opt,
1702                            sizeof(struct tcphdr),tos,ttl);
1703   if (tmp < 0) {
1704         buff->free = 1;
1705         prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1706         return;
1707   }
1708   t1 =(struct tcphdr *)((char *)t1 +tmp);
1709   buff->len += tmp;
1710   memcpy(t1, th, sizeof(*t1));
1711 
1712   /* Swap the send and the receive. */
1713   t1->dest = th->source;
1714   t1->source = th->dest;
1715   t1->rst = 1;  
1716   t1->window = 0;
1717   
1718   if(th->ack)
1719   {
1720         t1->ack=0;
1721         t1->seq=th->ack_seq;
1722         t1->ack_seq=0;
1723   }
1724   else
1725   {
1726         t1->ack=1;
1727         if(!th->syn)
1728                 t1->ack_seq=htonl(th->seq);
1729         else
1730                 t1->ack_seq=htonl(th->seq+1);
1731         t1->seq=0;
1732   }
1733 
1734   t1->syn = 0;
1735   t1->urg = 0;
1736   t1->fin = 0;
1737   t1->psh = 0;
1738   t1->doff = sizeof(*t1)/4;
1739   tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1740   prot->queue_xmit(NULL, dev, buff, 1);
1741 }
1742 
1743 
1744 /*
1745  *      Look for tcp options. Parses everything but only knows about MSS
1746  */
1747  
1748 static void
1749 tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1750 {
1751   unsigned char *ptr;
1752   int length=(th->doff*4)-sizeof(struct tcphdr);
1753     
1754   ptr = (unsigned char *)(th + 1);
1755   
1756   while(length>0)
1757   {
1758         int opcode=*ptr++;
1759         int opsize=*ptr++;
1760         switch(opcode)
1761         {
1762                 case TCPOPT_EOL:
1763                         return;
1764                 case TCPOPT_NOP:
1765                         length-=2;
1766                         continue;
1767                 
1768                 default:
1769                         if(opsize<=2)   /* Avoid silly options looping forever */
1770                                 return;
1771                         switch(opcode)
1772                         {
1773                                 case TCPOPT_MSS:
1774                                         if(opsize==4)
1775                                         {
1776                                                 sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1777                                         }
1778                                         break;
1779                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1780                         }
1781                         ptr+=opsize-2;
1782                         length-=opsize;
1783         }
1784   }
1785 
1786 }
1787 
1788 /*
1789  * This routine handles a connection request.
1790  * It should make sure we haven't already responded.
1791  * Because of the way BSD works, we have to send a syn/ack now.
1792  * This also means it will be harder to close a socket which is
1793  * listening.
1794  */
1795 static void
1796 tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1797                  unsigned long daddr, unsigned long saddr,
1798                  struct options *opt, struct device *dev)
1799 {
1800   struct sk_buff *buff;
1801   struct tcphdr *t1;
1802   unsigned char *ptr;
1803   struct sock *newsk;
1804   struct tcphdr *th;
1805   int tmp;
1806 
1807   DPRINTF((DBG_TCP, "tcp_conn_request(sk = %X, skb = %X, daddr = %X, sadd4= %X, \n"
1808           "                  opt = %X, dev = %X)\n",
1809           sk, skb, daddr, saddr, opt, dev));
1810   
1811   th = skb->h.th;
1812 
1813   /* If the socket is dead, don't accept the connection. */
1814   if (!sk->dead) {
1815         sk->data_ready(sk,0);
1816   } else {
1817         DPRINTF((DBG_TCP, "tcp_conn_request on dead socket\n"));
1818         tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1819         kfree_skb(skb, FREE_READ);
1820         return;
1821   }
1822 
1823   /*
1824    * Make sure we can accept more.  This will prevent a
1825    * flurry of syns from eating up all our memory.
1826    */
1827   if (sk->ack_backlog >= sk->max_ack_backlog) {
1828         kfree_skb(skb, FREE_READ);
1829         return;
1830   }
1831 
1832   /*
1833    * We need to build a new sock struct.
1834    * It is sort of bad to have a socket without an inode attached
1835    * to it, but the wake_up's will just wake up the listening socket,
1836    * and if the listening socket is destroyed before this is taken
1837    * off of the queue, this will take care of it.
1838    */
1839   newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
1840   if (newsk == NULL) {
1841         /* just ignore the syn.  It will get retransmitted. */
1842         kfree_skb(skb, FREE_READ);
1843         return;
1844   }
1845 
1846   DPRINTF((DBG_TCP, "newsk = %X\n", newsk));
1847   memcpy((void *)newsk,(void *)sk, sizeof(*newsk));
1848   newsk->wback = NULL;
1849   newsk->wfront = NULL;
1850   newsk->rqueue = NULL;
1851   newsk->send_head = NULL;
1852   newsk->send_tail = NULL;
1853   newsk->back_log = NULL;
1854   newsk->rtt = TCP_CONNECT_TIME;
1855   newsk->mdev = 0;
1856   newsk->backoff = 0;
1857   newsk->blog = 0;
1858   newsk->intr = 0;
1859   newsk->proc = 0;
1860   newsk->done = 0;
1861   newsk->send_tmp = NULL;
1862   newsk->pair = NULL;
1863   newsk->wmem_alloc = 0;
1864   newsk->rmem_alloc = 0;
1865 
1866   newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
1867 
1868   newsk->err = 0;
1869   newsk->shutdown = 0;
1870   newsk->ack_backlog = 0;
1871   newsk->acked_seq = skb->h.th->seq+1;
1872   newsk->fin_seq = skb->h.th->seq;
1873   newsk->copied_seq = skb->h.th->seq;
1874   newsk->state = TCP_SYN_RECV;
1875   newsk->timeout = 0;
1876   newsk->send_seq = jiffies * SEQ_TICK - seq_offset;
1877   newsk->rcv_ack_seq = newsk->send_seq;
1878   newsk->urg =0;
1879   newsk->retransmits = 0;
1880   newsk->destroy = 0;
1881   newsk->timer.data = (unsigned long)newsk;
1882   newsk->timer.function = &net_timer;
1883   newsk->dummy_th.source = skb->h.th->dest;
1884   newsk->dummy_th.dest = skb->h.th->source;
1885 
1886   /* Swap these two, they are from our point of view. */
1887   newsk->daddr = saddr;
1888   newsk->saddr = daddr;
1889 
1890   put_sock(newsk->num,newsk);
1891   newsk->dummy_th.res1 = 0;
1892   newsk->dummy_th.doff = 6;
1893   newsk->dummy_th.fin = 0;
1894   newsk->dummy_th.syn = 0;
1895   newsk->dummy_th.rst = 0;
1896   newsk->dummy_th.psh = 0;
1897   newsk->dummy_th.ack = 0;
1898   newsk->dummy_th.urg = 0;
1899   newsk->dummy_th.res2 = 0;
1900   newsk->acked_seq = skb->h.th->seq + 1;
1901   newsk->copied_seq = skb->h.th->seq;
1902 
1903   /* Grab the ttl and tos values and use them */
1904   newsk->ip_ttl=sk->ip_ttl;
1905   newsk->ip_tos=skb->ip_hdr->tos;
1906 
1907 /* use 512 or whatever user asked for */
1908 /* note use of sk->mss, since user has no direct access to newsk */
1909   if (sk->mss)
1910     newsk->mtu = sk->mss;
1911   else
1912     newsk->mtu = 576 - HEADER_SIZE;
1913 /* but not bigger than device MTU */
1914   newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
1915 
1916 /* this will min with what arrived in the packet */
1917   tcp_options(newsk,skb->h.th);
1918 
1919   buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
1920   if (buff == NULL) {
1921         sk->err = -ENOMEM;
1922         newsk->dead = 1;
1923         release_sock(newsk);
1924         kfree_skb(skb, FREE_READ);
1925         return;
1926   }
1927   
1928   buff->mem_addr = buff;
1929   buff->mem_len = MAX_SYN_SIZE;
1930   buff->len = sizeof(struct tcphdr)+4;
1931   buff->sk = newsk;
1932   
1933   t1 =(struct tcphdr *) buff->data;
1934 
1935   /* Put in the IP header and routing stuff. */
1936   tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &dev,
1937                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
1938 
1939   /* Something went wrong. */
1940   if (tmp < 0) {
1941         sk->err = tmp;
1942         buff->free=1;
1943         kfree_skb(buff,FREE_WRITE);
1944         newsk->dead = 1;
1945         release_sock(newsk);
1946         skb->sk = sk;
1947         kfree_skb(skb, FREE_READ);
1948         return;
1949   }
1950 
1951   buff->len += tmp;
1952   t1 =(struct tcphdr *)((char *)t1 +tmp);
1953   
1954   memcpy(t1, skb->h.th, sizeof(*t1));
1955   buff->h.seq = newsk->send_seq;
1956 
1957   /* Swap the send and the receive. */
1958   t1->dest = skb->h.th->source;
1959   t1->source = newsk->dummy_th.source;
1960   t1->seq = ntohl(newsk->send_seq++);
1961   t1->ack = 1;
1962   newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/
1963   t1->window = ntohs(newsk->window);
1964   t1->res1 = 0;
1965   t1->res2 = 0;
1966   t1->rst = 0;
1967   t1->urg = 0;
1968   t1->psh = 0;
1969   t1->syn = 1;
1970   t1->ack_seq = ntohl(skb->h.th->seq+1);
1971   t1->doff = sizeof(*t1)/4+1;
1972 
1973   ptr =(unsigned char *)(t1+1);
1974   ptr[0] = 2;
1975   ptr[1] = 4;
1976   ptr[2] = ((newsk->mtu) >> 8) & 0xff;
1977   ptr[3] =(newsk->mtu) & 0xff;
1978 
1979   tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
1980   newsk->prot->queue_xmit(newsk, dev, buff, 0);
1981 
1982   reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_CONNECT_TIME);
1983   skb->sk = newsk;
1984 
1985   /* Charge the sock_buff to newsk. */
1986   sk->rmem_alloc -= skb->mem_len;
1987   newsk->rmem_alloc += skb->mem_len;
1988 
1989   skb_queue_tail(&sk->rqueue,skb);
1990   sk->ack_backlog++;
1991   release_sock(newsk);
1992 }
1993 
1994 
1995 static void
1996 tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
1997 {
1998   struct sk_buff *buff;
1999   int need_reset = 0;
2000   struct tcphdr *t1, *th;
2001   struct proto *prot;
2002   struct device *dev=NULL;
2003   int tmp;
2004 
2005   /*
2006    * We need to grab some memory, and put together a FIN,
2007    * and then put it into the queue to be sent.
2008    */
2009   DPRINTF((DBG_TCP, "tcp_close((struct sock *)%X, %d)\n",sk, timeout));
2010   sk->inuse = 1;
2011   sk->keepopen = 1;
2012   sk->shutdown = SHUTDOWN_MASK;
2013 
2014   if (!sk->dead) 
2015         sk->state_change(sk);
2016 
2017   /* We need to flush the recv. buffs. */
2018   if (skb_peek(&sk->rqueue) != NULL) 
2019   {
2020         struct sk_buff *skb;
2021         if(sk->debug)
2022                 printk("Clean rcv queue\n");
2023         while((skb=skb_dequeue(&sk->rqueue))!=NULL)
2024         {
2025                 if(skb->len > 0 && after(skb->h.th->seq + skb->len + 1 , sk->copied_seq))
2026                                 need_reset = 1;
2027                 kfree_skb(skb, FREE_READ);
2028         }
2029         if(sk->debug)
2030                 printk("Cleaned.\n");
2031   }
2032   sk->rqueue = NULL;
2033 
2034   /* Get rid off any half-completed packets. */
2035   if (sk->send_tmp) {
2036         tcp_send_partial(sk);
2037   }
2038 
2039   switch(sk->state) {
2040         case TCP_FIN_WAIT1:
2041         case TCP_FIN_WAIT2:
2042         case TCP_LAST_ACK:
2043                 /* start a timer. */
2044                 reset_timer(sk, TIME_CLOSE, 4 * sk->rtt);
2045                 if (timeout) tcp_time_wait(sk);
2046                 release_sock(sk);
2047                 return; /* break causes a double release - messy */
2048         case TCP_TIME_WAIT:
2049                 if (timeout) {
2050                   sk->state = TCP_CLOSE;
2051                 }
2052                 release_sock(sk);
2053                 return;
2054         case TCP_LISTEN:
2055                 sk->state = TCP_CLOSE;
2056                 release_sock(sk);
2057                 return;
2058         case TCP_CLOSE:
2059                 release_sock(sk);
2060                 return;
2061         case TCP_CLOSE_WAIT:
2062         case TCP_ESTABLISHED:
2063         case TCP_SYN_SENT:
2064         case TCP_SYN_RECV:
2065                 prot =(struct proto *)sk->prot;
2066                 th =(struct tcphdr *)&sk->dummy_th;
2067                 buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2068                 if (buff == NULL) {
2069                         /* This will force it to try again later. */
2070                         /* Or it would have if someone released the socket
2071                            first. Anyway it might work now */
2072                         release_sock(sk);
2073                         if (sk->state != TCP_CLOSE_WAIT)
2074                                         sk->state = TCP_ESTABLISHED;
2075                         reset_timer(sk, TIME_CLOSE, 100);
2076                         return;
2077                 }
2078                 buff->mem_addr = buff;
2079                 buff->mem_len = MAX_FIN_SIZE;
2080                 buff->sk = sk;
2081                 buff->free = 1;
2082                 buff->len = sizeof(*t1);
2083                 t1 =(struct tcphdr *) buff->data;
2084 
2085                 /* Put in the IP header and routing stuff. */
2086                 tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2087                                          IPPROTO_TCP, sk->opt,
2088                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2089                 if (tmp < 0) {
2090                         kfree_skb(buff,FREE_WRITE);
2091                         DPRINTF((DBG_TCP, "Unable to build header for fin.\n"));
2092                         release_sock(sk);
2093                         return;
2094                 }
2095 
2096                 t1 =(struct tcphdr *)((char *)t1 +tmp);
2097                 buff->len += tmp;
2098                 buff->dev = dev;
2099                 memcpy(t1, th, sizeof(*t1));
2100                 t1->seq = ntohl(sk->send_seq);
2101                 sk->send_seq++;
2102                 buff->h.seq = sk->send_seq;
2103                 t1->ack = 1;
2104 
2105                 /* Ack everything immediately from now on. */
2106                 sk->delay_acks = 0;
2107                 t1->ack_seq = ntohl(sk->acked_seq);
2108                 t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
2109                 t1->fin = 1;
2110                 t1->rst = need_reset;
2111                 t1->doff = sizeof(*t1)/4;
2112                 tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2113 
2114                 if (sk->wfront == NULL) {
2115                         prot->queue_xmit(sk, dev, buff, 0);
2116                 } else {
2117                         reset_timer(sk, TIME_WRITE,
2118                           backoff(sk->backoff) * (2 * sk->mdev + sk->rtt));
2119                         buff->next = NULL;
2120                         if (sk->wback == NULL) {
2121                                 sk->wfront=buff;
2122                         } else {
2123                                 sk->wback->next = buff;
2124                         }
2125                         sk->wback = buff;
2126                         buff->magic = TCP_WRITE_QUEUE_MAGIC;
2127                 }
2128 
2129                 if (sk->state == TCP_CLOSE_WAIT) {
2130                         sk->state = TCP_FIN_WAIT2;
2131                 } else {
2132                         sk->state = TCP_FIN_WAIT1;
2133         }
2134   }
2135   release_sock(sk);
2136 }
2137 
2138 
2139 /*
2140  * This routine takes stuff off of the write queue,
2141  * and puts it in the xmit queue.
2142  */
2143 static void
2144 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2145 {
2146   struct sk_buff *skb;
2147 
2148   DPRINTF((DBG_TCP, "tcp_write_xmit(sk=%X)\n", sk));
2149 
2150   /* The bytes will have to remain here. In time closedown will
2151      empty the write queue and all will be happy */
2152   if(sk->zapped)
2153         return;
2154 
2155   while(sk->wfront != NULL &&
2156         before(sk->wfront->h.seq, sk->window_seq +1) &&
2157         (sk->retransmits == 0 ||
2158          sk->timeout != TIME_WRITE ||
2159          before(sk->wfront->h.seq, sk->rcv_ack_seq +1))
2160         && sk->packets_out < sk->cong_window) {
2161                 skb = sk->wfront;
2162                 IS_SKB(skb);
2163                 sk->wfront =(struct sk_buff *)skb->next;
2164                 if (sk->wfront == NULL) sk->wback = NULL;
2165                 skb->next = NULL;
2166                 if (skb->magic != TCP_WRITE_QUEUE_MAGIC) {
2167                         printk("tcp.c skb with bad magic(%X) on write queue. Squashing "
2168                                 "queue\n", skb->magic);
2169                         sk->wfront = NULL;
2170                         sk->wback = NULL;
2171                         return;
2172                 }
2173                 skb->magic = 0;
2174                 DPRINTF((DBG_TCP, "Sending a packet.\n"));
2175 
2176                 /* See if we really need to send the packet. */
2177                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) {
2178                         sk->retransmits = 0;
2179                         kfree_skb(skb, FREE_WRITE);
2180                         if (!sk->dead) sk->write_space(sk);
2181                 } else {
2182                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2183                 }
2184         }
2185 }
2186 
2187 
2188 /*
2189  * This routine sorts the send list, and resets the
2190  * sk->send_head and sk->send_tail pointers.
2191  */
2192 void
2193 sort_send(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2194 {
2195   struct sk_buff *list = NULL;
2196   struct sk_buff *skb,*skb2,*skb3;
2197 
2198   for (skb = sk->send_head; skb != NULL; skb = skb2) {
2199         skb2 = (struct sk_buff *)skb->link3;
2200         if (list == NULL || before (skb2->h.seq, list->h.seq)) {
2201                 skb->link3 = list;
2202                 sk->send_tail = skb;
2203                 list = skb;
2204         } else {
2205                 for (skb3 = list; ; skb3 = (struct sk_buff *)skb3->link3) {
2206                         if (skb3->link3 == NULL ||
2207                             before(skb->h.seq, skb3->link3->h.seq)) {
2208                                 skb->link3 = skb3->link3;
2209                                 skb3->link3 = skb;
2210                                 if (skb->link3 == NULL) sk->send_tail = skb;
2211                                 break;
2212                         }
2213                 }
2214         }
2215   }
2216   sk->send_head = list;
2217 }
2218   
2219 
2220 /* This routine deals with incoming acks, but not outgoing ones. */
2221 static int
2222 tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2223 {
2224   unsigned long ack;
2225   int flag = 0;
2226 
2227   if(sk->zapped)
2228         return(1);      /* Dead, cant ack any more so why bother */
2229 
2230   ack = ntohl(th->ack_seq);
2231   DPRINTF((DBG_TCP, "tcp_ack ack=%d, window=%d, "
2232           "sk->rcv_ack_seq=%d, sk->window_seq = %d\n",
2233           ack, ntohs(th->window), sk->rcv_ack_seq, sk->window_seq));
2234 
2235   if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2236         sk->retransmits = 0;
2237 
2238   if (after(ack, sk->send_seq+1) || before(ack, sk->rcv_ack_seq-1)) {
2239         if (after(ack, sk->send_seq) ||
2240            (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) {
2241                 return(0);
2242         }
2243         if (sk->keepopen) {
2244                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2245         }
2246         return(1);
2247   }
2248 
2249   if (len != th->doff*4) flag |= 1;
2250 
2251   /* See if our window has been shrunk. */
2252   if (after(sk->window_seq, ack+ntohs(th->window))) {
2253         /*
2254          * We may need to move packets from the send queue
2255          * to the write queue, if the window has been shrunk on us.
2256          * The RFC says you are not allowed to shrink your window
2257          * like this, but if the other end does, you must be able
2258          * to deal with it.
2259          */
2260         struct sk_buff *skb;
2261         struct sk_buff *skb2;
2262         struct sk_buff *wskb = NULL;
2263   
2264         skb2 = sk->send_head;
2265         sk->send_head = NULL;
2266         sk->send_tail = NULL;
2267 
2268         flag |= 4;
2269 
2270         sk->window_seq = ack + ntohs(th->window);
2271         cli();
2272         while (skb2 != NULL) {
2273                 skb = skb2;
2274                 skb2 = (struct sk_buff *)skb->link3;
2275                 skb->link3 = NULL;
2276                 if (after(skb->h.seq, sk->window_seq)) {
2277                         if (sk->packets_out > 0) sk->packets_out--;
2278                         /* We may need to remove this from the dev send list. */
2279                         if (skb->next != NULL) {
2280                                 skb_unlink(skb);                                
2281                         }
2282                         /* Now add it to the write_queue. */
2283                         skb->magic = TCP_WRITE_QUEUE_MAGIC;
2284                         if (wskb == NULL) {
2285                                 skb->next = sk->wfront;
2286                                 sk->wfront = skb;
2287                         } else {
2288                                 skb->next = wskb->next;
2289                                 wskb->next = skb;
2290                         }
2291                         if (sk->wback == wskb) sk->wback = skb;
2292                         wskb = skb;
2293                 } else {
2294                         if (sk->send_head == NULL) {
2295                                 sk->send_head = skb;
2296                                 sk->send_tail = skb;
2297                         } else {
2298                                 sk->send_tail->link3 = skb;
2299                                 sk->send_tail = skb;
2300                         }
2301                         skb->link3 = NULL;
2302                 }
2303         }
2304         sti();
2305   }
2306 
2307   if (sk->send_tail == NULL || sk->send_head == NULL) {
2308         sk->send_head = NULL;
2309         sk->send_tail = NULL;
2310         sk->packets_out= 0;
2311   }
2312 
2313   sk->window_seq = ack + ntohs(th->window);
2314 
2315   /* We don't want too many packets out there. */
2316   if (sk->timeout == TIME_WRITE && 
2317       sk->cong_window < 2048 && ack != sk->rcv_ack_seq) {
2318         if (sk->exp_growth) sk->cong_window *= 2;
2319           else sk->cong_window++;
2320   }
2321 
2322   DPRINTF((DBG_TCP, "tcp_ack: Updating rcv ack sequence.\n"));
2323   sk->rcv_ack_seq = ack;
2324 
2325   /*
2326    * if this ack opens up a zero window, clear backoff.  It was
2327    * being used to time the probes, and is probably far higher than
2328    * it needs to be for normal retransmission
2329    */
2330   if (sk->timeout == TIME_PROBE0) {
2331         if (sk->wfront != NULL &&   /* should always be non-null */
2332             ! before (sk->window_seq, sk->wfront->h.seq)) {
2333           sk->retransmits = 0;
2334           sk->backoff = 0;
2335         }
2336   }
2337 
2338   /* See if we can take anything off of the retransmit queue. */
2339   while(sk->send_head != NULL) {
2340         /* Check for a bug. */
2341         if (sk->send_head->link3 &&
2342             after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) {
2343                 printk("INET: tcp.c: *** bug send_list out of order.\n");
2344                 sort_send(sk);
2345         }
2346 
2347         if (before(sk->send_head->h.seq, ack+1)) {
2348                 struct sk_buff *oskb;
2349 
2350                 if (sk->retransmits) {
2351 
2352                   /* if we're retransmitting, don't start any new
2353                    * packets until after everything in retransmit queue
2354                    * is acked.  That's as close as I can come at the
2355                    * moment to slow start the way this code is organized
2356                    */
2357                   if (sk->send_head->link3)
2358                     sk->retransmits = 1;
2359                   else
2360                     sk->retransmits = 0;
2361                 }
2362 
2363                 /*
2364                  * need to restart backoff whenever we get a response,
2365                  * or things get impossible if we lose a window-full of
2366                  * data with very small MSS
2367                  */
2368                 sk->backoff = 0;
2369                 /* We have one less packet out there. */
2370                 if (sk->packets_out > 0) sk->packets_out --;
2371                 DPRINTF((DBG_TCP, "skb=%X skb->h.seq = %d acked ack=%d\n",
2372                                 sk->send_head, sk->send_head->h.seq, ack));
2373 
2374                 /* Wake up the process, it can probably write more. */
2375                 if (!sk->dead) sk->write_space(sk);
2376 
2377                 oskb = sk->send_head;
2378 
2379                 /* 
2380                  * In theory we're supposed to ignore rtt's when there's
2381                  * retransmission in process.  Unfortunately this means
2382                  * that if there's a sharp increase in RTT, we may 
2383                  * never get out of retransmission.  For the moment
2384                  * ignore the test.
2385                  */
2386 
2387                 if (/* sk->retransmits == 0 && */ !(flag&2)) {
2388                   long abserr, rtt = jiffies - oskb->when;
2389 
2390                   /*
2391                    * Berkeley's code puts these limits on a separate timeout
2392                    * field, not on the RTT estimate itself.  However the way this
2393                    * code is done, that would complicate things.  If we're going
2394                    * to clamp the values, we have to do so before calculating
2395                    * the mdev, or we'll get unreasonably large mdev's.  Experience
2396                    * shows that with a minium rtt of .1 sec, we get spurious
2397                    * retransmits, due to delayed acks on some hosts.  Berkeley uses
2398                    * 1 sec, so why not?
2399                    */
2400 
2401                   if (rtt < 100) rtt = 100; /* 1 sec */
2402                   if (rtt > 12000) rtt = 12000; /* 2 min - max rtt allowed by protocol */
2403 
2404                   if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
2405                     /* first ack, so nothing else to average with */
2406                     sk->rtt = rtt;
2407                     sk->mdev = rtt; /* overcautious initial estimate */
2408                   }
2409                   else {
2410                     abserr = (rtt > sk->rtt) ? rtt - sk->rtt : sk->rtt - rtt;
2411                     sk->rtt = (7 * sk->rtt + rtt) >> 3;
2412                     sk->mdev = (3 * sk->mdev + abserr) >> 2;
2413                   }
2414                   sk->backoff = 0;
2415                 }
2416                 flag |= (2|4);
2417 
2418                 cli();
2419 
2420                 oskb = sk->send_head;
2421                 IS_SKB(oskb);
2422                 sk->send_head =(struct sk_buff *)oskb->link3;
2423                 if (sk->send_head == NULL) {
2424                         sk->send_tail = NULL;
2425                 }
2426 
2427                 /* We may need to remove this from the dev send list. */                
2428                 skb_unlink(oskb);       /* Much easier! */
2429                 sti();
2430                 oskb->magic = 0;
2431                 kfree_skb(oskb, FREE_WRITE); /* write. */
2432                 if (!sk->dead) sk->write_space(sk);
2433         } else {
2434                 break;
2435         }
2436   }
2437 
2438   /*
2439    * Maybe we can take some stuff off of the write queue,
2440    * and put it onto the xmit queue.
2441    */
2442   if (sk->wfront != NULL) {
2443         if (after (sk->window_seq+1, sk->wfront->h.seq) &&
2444                 (sk->retransmits == 0 || 
2445                  sk->timeout != TIME_WRITE ||
2446                  before(sk->wfront->h.seq, sk->rcv_ack_seq +1))
2447                 && sk->packets_out < sk->cong_window) {
2448                 flag |= 1;
2449                 tcp_write_xmit(sk);
2450         } else if (before(sk->window_seq, sk->wfront->h.seq) &&
2451                    sk->send_head == NULL &&
2452                    sk->ack_backlog == 0 &&
2453                    sk->state != TCP_TIME_WAIT) {
2454                 reset_timer(sk, TIME_PROBE0, 
2455                             backoff(sk->backoff) * (2 * sk->mdev + sk->rtt));
2456         }               
2457   } else {
2458         if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2459             sk->state != TCP_TIME_WAIT && !sk->keepopen) {
2460                 DPRINTF((DBG_TCP, "Nothing to do, going to sleep.\n")); 
2461                 if (!sk->dead) sk->write_space(sk);
2462 
2463                 if (sk->keepopen)
2464                         reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2465                 else
2466                         delete_timer(sk);
2467         } else {
2468                 if (sk->state != (unsigned char) sk->keepopen) {
2469                         reset_timer(sk, TIME_WRITE,
2470                           backoff(sk->backoff) * (2 * sk->mdev + sk->rtt));
2471                 }
2472                 if (sk->state == TCP_TIME_WAIT) {
2473                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2474                 }
2475         }
2476   }
2477 
2478   if (sk->packets_out == 0 && sk->send_tmp != NULL &&
2479       sk->wfront == NULL && sk->send_head == NULL) {
2480         flag |= 1;
2481         tcp_send_partial(sk);
2482   }
2483 
2484   /* See if we are done. */
2485   if (sk->state == TCP_TIME_WAIT) {
2486         if (!sk->dead)
2487                 sk->state_change(sk);
2488         if (sk->rcv_ack_seq == sk->send_seq && sk->acked_seq == sk->fin_seq) {
2489                 flag |= 1;
2490                 sk->state = TCP_CLOSE;
2491                 sk->shutdown = SHUTDOWN_MASK;
2492         }
2493   }
2494 
2495   if (sk->state == TCP_LAST_ACK || sk->state == TCP_FIN_WAIT2) {
2496         if (!sk->dead) sk->state_change(sk);
2497         if (sk->rcv_ack_seq == sk->send_seq) {
2498                 flag |= 1;
2499                 if (sk->acked_seq != sk->fin_seq) {
2500                         tcp_time_wait(sk);
2501                 } else {
2502                         DPRINTF((DBG_TCP, "tcp_ack closing socket - %X\n", sk));
2503                         tcp_send_ack(sk->send_seq, sk->acked_seq, sk,
2504                                      th, sk->daddr);
2505                         sk->shutdown = SHUTDOWN_MASK;
2506                         sk->state = TCP_CLOSE;
2507                 }
2508         }
2509   }
2510 
2511   if (((!flag) || (flag&4)) && sk->send_head != NULL &&
2512       (sk->send_head->when + backoff(sk->backoff) * (2 * sk->mdev + sk->rtt)
2513        < jiffies)) {
2514         sk->exp_growth = 0;
2515         ip_retransmit(sk, 1);
2516   }
2517 
2518   DPRINTF((DBG_TCP, "leaving tcp_ack\n"));
2519   return(1);
2520 }
2521 
2522 
2523 /*
2524  * This routine handles the data.  If there is room in the buffer,
2525  * it will be have already been moved into it.  If there is no
2526  * room, then we will just have to discard the packet.
2527  */
2528 static int
2529 tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
2530          unsigned long saddr, unsigned short len)
2531 {
2532   struct sk_buff *skb1, *skb2;
2533   struct tcphdr *th;
2534   int dup_dumped=0;
2535 
2536   th = skb->h.th;
2537   print_th(th);
2538   skb->len = len -(th->doff*4);
2539 
2540   DPRINTF((DBG_TCP, "tcp_data len = %d sk = %X:\n", skb->len, sk));
2541 
2542   sk->bytes_rcv += skb->len;
2543   if (skb->len == 0 && !th->fin && !th->urg && !th->psh) {
2544         /* Don't want to keep passing ack's back and forth. */
2545         if (!th->ack) tcp_send_ack(sk->send_seq, sk->acked_seq,sk, th, saddr);
2546         kfree_skb(skb, FREE_READ);
2547         return(0);
2548   }
2549 
2550   if (sk->shutdown & RCV_SHUTDOWN) {
2551         sk->acked_seq = th->seq + skb->len + th->syn + th->fin;
2552         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
2553         sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
2554         sk->state = TCP_CLOSE;
2555         sk->err = EPIPE;
2556         sk->shutdown = SHUTDOWN_MASK;
2557         DPRINTF((DBG_TCP, "tcp_data: closing socket - %X\n", sk));
2558         kfree_skb(skb, FREE_READ);
2559         if (!sk->dead) sk->state_change(sk);
2560         return(0);
2561   }
2562 
2563   /*
2564    * Now we have to walk the chain, and figure out where this one
2565    * goes into it.  This is set up so that the last packet we received
2566    * will be the first one we look at, that way if everything comes
2567    * in order, there will be no performance loss, and if they come
2568    * out of order we will be able to fit things in nicely.
2569    */
2570 
2571   /* This should start at the last one, and then go around forwards. */
2572   if (sk->rqueue == NULL) {
2573         DPRINTF((DBG_TCP, "tcp_data: skb = %X:\n", skb));
2574 #ifdef OLDWAY
2575         sk->rqueue = skb;
2576         skb->next = skb;
2577         skb->prev = skb;
2578         skb->list = &sk->rqueue;
2579 #else
2580         skb_queue_head(&sk->rqueue,skb);
2581 #endif          
2582         skb1= NULL;
2583   } else {
2584         DPRINTF((DBG_TCP, "tcp_data adding to chain sk = %X:\n", sk));
2585         for(skb1=sk->rqueue->prev; ; skb1 =(struct sk_buff *)skb1->prev) {
2586                 if(sk->debug)
2587                 {
2588                         printk("skb1=%p :", skb1);
2589                         printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
2590                         printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
2591                         printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
2592                                         sk->acked_seq);
2593                 }
2594 #ifdef OLD              
2595                 if (after(th->seq+1, skb1->h.th->seq)) {
2596                         skb->prev = skb1;
2597                         skb->next = skb1->next;
2598                         skb->next->prev = skb;
2599                         skb1->next = skb;
2600                         if (skb1 == sk->rqueue) sk->rqueue = skb;
2601                         break;
2602                 }
2603                 if (skb1->prev == sk->rqueue) {
2604                         skb->next= skb1;
2605                         skb->prev = skb1->prev;
2606                         skb->prev->next = skb;
2607                         skb1->prev = skb;
2608                         skb1 = NULL; /* so we know we might be able
2609                                         to ack stuff. */
2610                         break;
2611                 }
2612 #else
2613                 if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
2614                 {
2615                         skb_append(skb1,skb);
2616                         skb_unlink(skb1);
2617                         kfree_skb(skb1,FREE_READ);
2618                         dup_dumped=1;
2619                         skb1=NULL;
2620                         break;
2621                 }
2622                 if (after(th->seq+1, skb1->h.th->seq))
2623                 {
2624                         skb_append(skb1,skb);
2625                         break;
2626                 }
2627                 if (skb1 == sk->rqueue)
2628                 {
2629                         skb_queue_head(&sk->rqueue, skb);               
2630                         break;
2631                 }
2632 #endif          
2633         }
2634         DPRINTF((DBG_TCP, "skb = %X:\n", skb));
2635   }
2636 
2637   th->ack_seq = th->seq + skb->len;
2638   if (th->syn) th->ack_seq++;
2639   if (th->fin) th->ack_seq++;
2640 
2641   if (before(sk->acked_seq, sk->copied_seq)) {
2642         printk("*** tcp.c:tcp_data bug acked < copied\n");
2643         sk->acked_seq = sk->copied_seq;
2644   }
2645 
2646   /* Now figure out if we can ack anything. */
2647   if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) {
2648       if (before(th->seq, sk->acked_seq+1)) {
2649                 if (after(th->ack_seq, sk->acked_seq))
2650                                         sk->acked_seq = th->ack_seq;
2651                 skb->acked = 1;
2652 
2653                 /* When we ack the fin, we turn on the RCV_SHUTDOWN flag. */
2654                 if (skb->h.th->fin) {
2655                         if (!sk->dead) sk->state_change(sk);
2656                         sk->shutdown |= RCV_SHUTDOWN;
2657                 }
2658           
2659                 for(skb2 = (struct sk_buff *)skb->next;
2660                     skb2 !=(struct sk_buff *) sk->rqueue;
2661                     skb2 = (struct sk_buff *)skb2->next) {
2662                         if (before(skb2->h.th->seq, sk->acked_seq+1)) {
2663                                 if (after(skb2->h.th->ack_seq, sk->acked_seq))
2664                                 {
2665                                         long old_acked_seq = sk->acked_seq;
2666                                         sk->acked_seq = skb2->h.th->ack_seq;
2667                                         if((int)(sk->acked_seq - old_acked_seq) >0)
2668                                         {
2669                                                 int new_window=sk->window-sk->acked_seq+
2670                                                         old_acked_seq;
2671                                                 if(new_window<0)
2672                                                         new_window=0;
2673                                                 sk->window = new_window;
2674                                         }
2675                                 }
2676                                 skb2->acked = 1;
2677 
2678                                 /*
2679                                  * When we ack the fin, we turn on
2680                                  * the RCV_SHUTDOWN flag.
2681                                  */
2682                                 if (skb2->h.th->fin) {
2683                                         sk->shutdown |= RCV_SHUTDOWN;
2684                                         if (!sk->dead) sk->state_change(sk);
2685                                 }
2686 
2687                                 /* Force an immediate ack. */
2688                                 sk->ack_backlog = sk->max_ack_backlog;
2689                         } else {
2690                                 break;
2691                         }
2692                 }
2693 
2694                 /*
2695                  * This also takes care of updating the window.
2696                  * This if statement needs to be simplified.
2697                  */
2698                 if (!sk->delay_acks ||
2699                     sk->ack_backlog >= sk->max_ack_backlog || 
2700                     sk->bytes_rcv > sk->max_unacked || th->fin) {
2701 /*                      tcp_send_ack(sk->send_seq, sk->acked_seq,sk,th, saddr); */
2702                 } else {
2703                         sk->ack_backlog++;
2704                         if(sk->debug)
2705                                 printk("Ack queued.\n");
2706                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
2707                 }
2708         }
2709   }
2710 
2711   /*
2712    * If we've missed a packet, send an ack.
2713    * Also start a timer to send another.
2714    */
2715   if (!skb->acked) {
2716         /*
2717          * This is important.  If we don't have much room left,
2718          * we need to throw out a few packets so we have a good
2719          * window.
2720          */
2721         while (sk->prot->rspace(sk) < sk->mtu) {
2722                 skb1 = skb_peek(&sk->rqueue);
2723                 if (skb1 == NULL) {
2724                         printk("INET: tcp.c:tcp_data memory leak detected.\n");
2725                         break;
2726                 }
2727 
2728                 /* Don't throw out something that has been acked. */
2729                 if (skb1->acked) {
2730                         break;
2731                 }
2732                 
2733                 skb_unlink(skb1);
2734 #ifdef OLDWAY           
2735                 if (skb1->prev == skb1) {
2736                         sk->rqueue = NULL;
2737                 } else {
2738                         sk->rqueue = (struct sk_buff *)skb1->prev;
2739                         skb1->next->prev = skb1->prev;
2740                         skb1->prev->next = skb1->next;
2741                 }
2742 #endif          
2743                 kfree_skb(skb1, FREE_READ);
2744         }
2745         tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
2746         sk->ack_backlog++;
2747         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
2748   } else {
2749         /* We missed a packet.  Send an ack to try to resync things. */
2750         tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
2751   }
2752 
2753   /* Now tell the user we may have some data. */
2754   if (!sk->dead) {
2755         if(sk->debug)
2756                 printk("Data wakeup.\n");
2757         sk->data_ready(sk,0);
2758   } else {
2759         DPRINTF((DBG_TCP, "data received on dead socket.\n"));
2760   }
2761 
2762   if (sk->state == TCP_FIN_WAIT2 &&
2763       sk->acked_seq == sk->fin_seq && sk->rcv_ack_seq == sk->send_seq) {
2764         DPRINTF((DBG_TCP, "tcp_data: entering last_ack state sk = %X\n", sk));
2765 
2766 /*      tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr); */
2767         sk->shutdown = SHUTDOWN_MASK;
2768         sk->state = TCP_LAST_ACK;
2769         if (!sk->dead) sk->state_change(sk);
2770   }
2771 
2772   return(0);
2773 }
2774 
2775 
2776 static int
2777 tcp_urg(struct sock *sk, struct tcphdr *th, unsigned long saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
2778 {
2779   extern int kill_pg(int pg, int sig, int priv);
2780   extern int kill_proc(int pid, int sig, int priv);
2781     
2782   if (!sk->dead) 
2783         sk->data_ready(sk,0);
2784     
2785   if (sk->urginline) {
2786         th->urg = 0;
2787         th->psh = 1;
2788         return(0);
2789   }
2790 
2791   if (!sk->urg) {
2792         /* So if we get more urgent data, we don't signal the user again. */
2793         if (sk->proc != 0) {
2794                 if (sk->proc > 0) {
2795                         kill_proc(sk->proc, SIGURG, 1);
2796                 } else {
2797                         kill_pg(-sk->proc, SIGURG, 1);
2798                 }
2799         }
2800   }
2801   sk->urg++;
2802   return(0);
2803 }
2804 
2805 
2806 /* This deals with incoming fins. 'Linus at 9 O'clock' 8-) */
2807 static int
2808 tcp_fin(struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
2809          unsigned long saddr, struct device *dev)
2810 {
2811   DPRINTF((DBG_TCP, "tcp_fin(sk=%X, th=%X, saddr=%X, dev=%X)\n",
2812                                                 sk, th, saddr, dev));
2813   
2814   if (!sk->dead) {
2815         sk->state_change(sk);
2816   }
2817 
2818   switch(sk->state) {
2819         case TCP_SYN_RECV:
2820         case TCP_SYN_SENT:
2821         case TCP_ESTABLISHED:
2822                 /* Contains the one that needs to be acked */
2823                 sk->fin_seq = th->seq+1;
2824                 sk->state = TCP_CLOSE_WAIT;
2825                 if (th->rst) sk->shutdown = SHUTDOWN_MASK;
2826                 break;
2827 
2828         case TCP_CLOSE_WAIT:
2829         case TCP_FIN_WAIT2:
2830                 break; /* we got a retransmit of the fin. */
2831 
2832         case TCP_FIN_WAIT1:
2833                 /* Contains the one that needs to be acked */
2834                 sk->fin_seq = th->seq+1;
2835                 sk->state = TCP_FIN_WAIT2;
2836                 break;
2837 
2838         default:
2839         case TCP_TIME_WAIT:
2840                 sk->state = TCP_LAST_ACK;
2841 
2842                 /* Start the timers. */
2843                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2844                 return(0);
2845   }
2846   sk->ack_backlog++;
2847 
2848   return(0);
2849 }
2850 
2851 
2852 /* This will accept the next outstanding connection. */
2853 static struct sock *
2854 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
2855 {
2856   struct sock *newsk;
2857   struct sk_buff *skb;
2858   
2859   DPRINTF((DBG_TCP, "tcp_accept(sk=%X, flags=%X, addr=%s)\n",
2860                                 sk, flags, in_ntoa(sk->saddr)));
2861 
2862   /*
2863    * We need to make sure that this socket is listening,
2864    * and that it has something pending.
2865    */
2866   if (sk->state != TCP_LISTEN) {
2867         sk->err = EINVAL;
2868         return(NULL); 
2869   }
2870 
2871   /* avoid the race. */
2872   cli();
2873   sk->inuse = 1;
2874   while((skb = get_firstr(sk)) == NULL) {
2875         if (flags & O_NONBLOCK) {
2876                 sti();
2877                 release_sock(sk);
2878                 sk->err = EAGAIN;
2879                 return(NULL);
2880         }
2881 
2882         release_sock(sk);
2883         interruptible_sleep_on(sk->sleep);
2884         if (current->signal & ~current->blocked) {
2885                 sti();
2886                 sk->err = ERESTARTSYS;
2887                 return(NULL);
2888         }
2889         sk->inuse = 1;
2890   }
2891   sti();
2892 
2893   /* Now all we need to do is return skb->sk. */
2894   newsk = skb->sk;
2895 
2896   kfree_skb(skb, FREE_READ);
2897   sk->ack_backlog--;
2898   release_sock(sk);
2899   return(newsk);
2900 }
2901 
2902 
2903 /* This will initiate an outgoing connection. */
2904 static int
2905 tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
2906 {
2907   struct sk_buff *buff;
2908   struct sockaddr_in sin;
2909   struct device *dev=NULL;
2910   unsigned char *ptr;
2911   int tmp;
2912   struct tcphdr *t1;
2913   int err;
2914 
2915   if (sk->state != TCP_CLOSE) return(-EISCONN);
2916   if (addr_len < 8) return(-EINVAL);
2917 
2918   err=verify_area(VERIFY_READ, usin, addr_len);
2919   if(err)
2920         return err;
2921         
2922   memcpy_fromfs(&sin,usin, min(sizeof(sin), addr_len));
2923 
2924   if (sin.sin_family && sin.sin_family != AF_INET) return(-EAFNOSUPPORT);
2925 
2926   DPRINTF((DBG_TCP, "TCP connect daddr=%s\n", in_ntoa(sin.sin_addr.s_addr)));
2927   
2928   /* Don't want a TCP connection going to a broadcast address */
2929   if (chk_addr(sin.sin_addr.s_addr) == IS_BROADCAST) { 
2930         DPRINTF((DBG_TCP, "TCP connection to broadcast address not allowed\n"));
2931         return(-ENETUNREACH);
2932   }
2933   
2934   sk->inuse = 1;
2935   sk->daddr = sin.sin_addr.s_addr;
2936   sk->send_seq = jiffies * SEQ_TICK - seq_offset;
2937   sk->rcv_ack_seq = sk->send_seq -1;
2938   sk->err = 0;
2939   sk->dummy_th.dest = sin.sin_port;
2940   release_sock(sk);
2941 
2942   buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
2943   if (buff == NULL) {
2944         return(-ENOMEM);
2945   }
2946   sk->inuse = 1;
2947   buff->mem_addr = buff;
2948   buff->mem_len = MAX_SYN_SIZE;
2949   buff->len = 24;
2950   buff->sk = sk;
2951   buff->free = 1;
2952   t1 = (struct tcphdr *) buff->data;
2953 
2954   /* Put in the IP header and routing stuff. */
2955   /* We need to build the routing stuff fromt the things saved in skb. */
2956   tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
2957                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2958   if (tmp < 0) {
2959         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
2960         release_sock(sk);
2961         return(-ENETUNREACH);
2962   }
2963   buff->len += tmp;
2964   t1 = (struct tcphdr *)((char *)t1 +tmp);
2965 
2966   memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
2967   t1->seq = ntohl(sk->send_seq++);
2968   buff->h.seq = sk->send_seq;
2969   t1->ack = 0;
2970   t1->window = 2;
2971   t1->res1=0;
2972   t1->res2=0;
2973   t1->rst = 0;
2974   t1->urg = 0;
2975   t1->psh = 0;
2976   t1->syn = 1;
2977   t1->urg_ptr = 0;
2978   t1->doff = 6;
2979 
2980 /* use 512 or whatever user asked for */
2981   if (sk->mss)
2982     sk->mtu = sk->mss;
2983   else
2984     sk->mtu = 576 - HEADER_SIZE;
2985 /* but not bigger than device MTU */
2986   sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
2987 
2988   /* Put in the TCP options to say MTU. */
2989   ptr = (unsigned char *)(t1+1);
2990   ptr[0] = 2;
2991   ptr[1] = 4;
2992   ptr[2] = (sk->mtu) >> 8;
2993   ptr[3] = (sk->mtu) & 0xff;
2994   tcp_send_check(t1, sk->saddr, sk->daddr,
2995                   sizeof(struct tcphdr) + 4, sk);
2996 
2997   /* This must go first otherwise a really quick response will get reset. */
2998   sk->state = TCP_SYN_SENT;
2999   sk->rtt = TCP_CONNECT_TIME;
3000   reset_timer(sk, TIME_WRITE, TCP_CONNECT_TIME);        /* Timer for repeating the SYN until an answer */
3001   sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3002 
3003   sk->prot->queue_xmit(sk, dev, buff, 0);  
3004   
3005   release_sock(sk);
3006   return(0);
3007 }
3008 
3009 
3010 /* This functions checks to see if the tcp header is actually acceptible. */
3011 static int
3012 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3013              struct options *opt, unsigned long saddr, struct device *dev)
3014 {
3015   /*
3016    * This isn't quite right.  sk->acked_seq could be more recent
3017    * than sk->window.  This is however close enough.  We will accept
3018    * slightly more packets than we should, but it should not cause
3019    * problems unless someone is trying to forge packets.
3020    */
3021   DPRINTF((DBG_TCP, "tcp_sequence(sk=%X, th=%X, len = %d, opt=%d, saddr=%X)\n",
3022           sk, th, len, opt, saddr));
3023 
3024   if (between(th->seq, sk->acked_seq, sk->acked_seq + sk->window)||
3025       between(th->seq + len-(th->doff*4), sk->acked_seq + 1,
3026               sk->acked_seq + sk->window) ||
3027      (before(th->seq, sk->acked_seq) &&
3028        after(th->seq + len -(th->doff*4), sk->acked_seq + sk->window))) {
3029        return(1);
3030    }
3031   DPRINTF((DBG_TCP, "tcp_sequence: rejecting packet.\n"));
3032 
3033   /*
3034    *    Send a reset if we get something not ours and we are
3035    *    unsynchronized. Note: We don't do anything to our end. We
3036    *    are just killing the bogus remote connection then we will
3037    *    connect again and it will work (with luck).
3038    */
3039          
3040   if(sk->state==TCP_SYN_SENT||sk->state==TCP_SYN_RECV)
3041   {
3042         tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3043         return(1);
3044   }
3045 
3046   /*
3047    * If it's too far ahead, send an ack to let the
3048    * other end know what we expect.
3049    */
3050   if (after(th->seq, sk->acked_seq + sk->window)) {
3051         if(!th->rst)
3052                 tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
3053         return(0);
3054   }
3055 
3056 #ifdef undef
3057 /*
3058  * if we do this, we won't respond to keepalive packets, since those
3059  * are slightly out of window, and we have to generate an ack
3060  * a late ack out still not to have a sequence number less than
3061  * one we've seen before.  Berkeley doesn't seem to do this, but it's
3062  * always hard to be sure.
3063  */
3064   /* In case it's just a late ack, let it through. */
3065   if (th->ack && len == (th->doff * 4) &&
3066       after(th->seq, sk->acked_seq - 32767) &&
3067       !th->fin && !th->syn) return(1);
3068 #endif
3069 
3070   if (!th->rst) {
3071         /* Try to resync things. */
3072         tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
3073   }
3074   return(0);
3075 }
3076 
3077 
3078 
3079 
3080 
3081 int
3082 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3083         unsigned long daddr, unsigned short len,
3084         unsigned long saddr, int redo, struct inet_protocol * protocol)
3085 {
3086   struct tcphdr *th;
3087   struct sock *sk;
3088 
3089   if (!skb) {
3090         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv skb = NULL\n"));
3091         return(0);
3092   }
3093 #if 0   /* FIXME: it's ok for protocol to be NULL */
3094   if (!protocol) {
3095         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv protocol = NULL\n"));
3096         return(0);
3097   }
3098 
3099   if (!opt) {   /* FIXME: it's ok for opt to be NULL */
3100         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv opt = NULL\n"));
3101   }
3102 #endif
3103   if (!dev) {
3104         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv dev = NULL\n"));
3105         return(0);
3106   }
3107   th = skb->h.th;
3108 
3109   /* Find the socket. */
3110   sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3111   DPRINTF((DBG_TCP, "<<\n"));
3112   DPRINTF((DBG_TCP, "len = %d, redo = %d, skb=%X\n", len, redo, skb));
3113   
3114   /* If this socket has got a reset its to all intents and purposes 
3115      really dead */
3116   if (sk!=NULL && sk->zapped)
3117         sk=NULL;
3118 
3119   if (sk) {
3120          DPRINTF((DBG_TCP, "sk = %X:\n", sk));
3121   }
3122 
3123   if (!redo) {
3124         if (tcp_check(th, len, saddr, daddr )) {
3125                 skb->sk = NULL;
3126                 DPRINTF((DBG_TCP, "packet dropped with bad checksum.\n"));
3127 if (inet_debug == DBG_SLIP) printk("\rtcp_rcv: bad checksum\n");
3128                 kfree_skb(skb,FREE_READ);
3129                 /*
3130                  * We don't release the socket because it was
3131                  * never marked in use.
3132                  */
3133                 return(0);
3134         }
3135 
3136         /* See if we know about the socket. */
3137         if (sk == NULL) {
3138                 if (!th->rst) 
3139                 {       
3140                         th->seq = ntohl(th->seq);
3141                         /* So reset is always called with th->seq in host order */
3142                         tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3143                 }
3144                 skb->sk = NULL;
3145                 kfree_skb(skb, FREE_READ);
3146                 return(0);
3147         }
3148 
3149         skb->len = len;
3150         skb->sk = sk;
3151         skb->acked = 0;
3152         skb->used = 0;
3153         skb->free = 0;
3154         skb->urg_used = 0;
3155         skb->saddr = daddr;
3156         skb->daddr = saddr;
3157 
3158         th->seq = ntohl(th->seq);
3159 
3160        /* We may need to add it to the backlog here. */
3161        cli();
3162        if (sk->inuse) {
3163                 if (sk->back_log == NULL) {
3164                         sk->back_log = skb;
3165                         skb->next = skb;
3166                         skb->prev = skb;
3167                 } else {
3168                         skb->next = sk->back_log;
3169                         skb->prev = sk->back_log->prev;
3170                         skb->prev->next = skb;
3171                         skb->next->prev = skb;
3172                 }
3173                 sti();
3174                 return(0);
3175         }
3176         sk->inuse = 1;
3177         sti();
3178   } else {
3179         if (!sk) {
3180                 DPRINTF((DBG_TCP, "tcp.c: tcp_rcv bug sk=NULL redo = 1\n"));
3181                 return(0);
3182         }
3183   }
3184 
3185   if (!sk->prot) {
3186         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv sk->prot = NULL \n"));
3187         return(0);
3188   }
3189 
3190   /* Charge the memory to the socket. */
3191   if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) {
3192         skb->sk = NULL;
3193         DPRINTF((DBG_TCP, "dropping packet due to lack of buffer space.\n"));
3194         kfree_skb(skb, FREE_READ);
3195         release_sock(sk);
3196         return(0);
3197   }
3198   sk->rmem_alloc += skb->mem_len;
3199 
3200   DPRINTF((DBG_TCP, "About to do switch.\n"));
3201 
3202   /* Now deal with it. */
3203   switch(sk->state) {
3204         /*
3205          * This should close the system down if it's waiting
3206          * for an ack that is never going to be sent.
3207          */
3208         case TCP_LAST_ACK:
3209                 if (th->rst) {
3210                         sk->zapped=1;
3211                         sk->err = ECONNRESET;
3212                         sk->state = TCP_CLOSE;
3213                         sk->shutdown = SHUTDOWN_MASK;
3214                         if (!sk->dead) {
3215                                 sk->state_change(sk);
3216                         }
3217                         kfree_skb(skb, FREE_READ);
3218                         release_sock(sk);
3219                         return(0);
3220                 }
3221 
3222         case TCP_ESTABLISHED:
3223         case TCP_CLOSE_WAIT:
3224         case TCP_FIN_WAIT1:
3225         case TCP_FIN_WAIT2:
3226         case TCP_TIME_WAIT:
3227                 if (!tcp_sequence(sk, th, len, opt, saddr,dev)) {
3228 if (inet_debug == DBG_SLIP) printk("\rtcp_rcv: not in seq\n");
3229 #ifdef undef
3230 /* nice idea, but tcp_sequence already does this.  Maybe it shouldn't?? */
3231                         if(!th->rst)
3232                                 tcp_send_ack(sk->send_seq, sk->acked_seq, 
3233                                      sk, th, saddr);
3234 #endif
3235                         kfree_skb(skb, FREE_READ);
3236                         release_sock(sk);
3237                         return(0);
3238                 }
3239 
3240                 if (th->rst) {
3241                         sk->zapped=1;
3242                         /* This means the thing should really be closed. */
3243                         sk->err = ECONNRESET;
3244 
3245                         if (sk->state == TCP_CLOSE_WAIT) {
3246                                 sk->err = EPIPE;
3247                         }
3248 
3249                         /*
3250                          * A reset with a fin just means that
3251                          * the data was not all read.
3252                          */
3253                         sk->state = TCP_CLOSE;
3254                         sk->shutdown = SHUTDOWN_MASK;
3255                         if (!sk->dead) {
3256                                 sk->state_change(sk);
3257                         }
3258                         kfree_skb(skb, FREE_READ);
3259                         release_sock(sk);
3260                         return(0);
3261                 }
3262                 if (
3263 #if 0
3264                 if ((opt && (opt->security != 0 ||
3265                             opt->compartment != 0)) || 
3266 #endif
3267                                  th->syn) {
3268                         sk->err = ECONNRESET;
3269                         sk->state = TCP_CLOSE;
3270                         sk->shutdown = SHUTDOWN_MASK;
3271                         tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
3272                         if (!sk->dead) {
3273                                 sk->state_change(sk);
3274                         }
3275                         kfree_skb(skb, FREE_READ);
3276                         release_sock(sk);
3277                         return(0);
3278                 }
3279                 if (th->ack) {
3280                         if (!tcp_ack(sk, th, saddr, len)) {
3281                                 kfree_skb(skb, FREE_READ);
3282                                 release_sock(sk);
3283                                 return(0);
3284                         }
3285                 }
3286                 if (th->urg) {
3287                         if (tcp_urg(sk, th, saddr)) {
3288                                 kfree_skb(skb, FREE_READ);
3289                                 release_sock(sk);
3290                                 return(0);
3291                         }
3292                 }
3293 
3294                 if (tcp_data(skb, sk, saddr, len)) {
3295                         kfree_skb(skb, FREE_READ);
3296                         release_sock(sk);
3297                         return(0);
3298                 }
3299 
3300                 /* Moved: you must do data then fin bit */
3301                 if (th->fin && tcp_fin(sk, th, saddr, dev)) {
3302                         kfree_skb(skb, FREE_READ);
3303                         release_sock(sk);
3304                         return(0);
3305                 }
3306 
3307                 release_sock(sk);
3308                 return(0);
3309 
3310         case TCP_CLOSE:
3311                 if (sk->dead || sk->daddr) {
3312                         DPRINTF((DBG_TCP, "packet received for closed,dead socket\n"));
3313                         kfree_skb(skb, FREE_READ);
3314                         release_sock(sk);
3315                         return(0);
3316                 }
3317 
3318                 if (!th->rst) {
3319                         if (!th->ack)
3320                                 th->ack_seq = 0;
3321                         tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3322                 }
3323                 kfree_skb(skb, FREE_READ);
3324                 release_sock(sk);
3325                 return(0);
3326 
3327         case TCP_LISTEN:
3328                 if (th->rst) {
3329                         kfree_skb(skb, FREE_READ);
3330                         release_sock(sk);
3331                         return(0);
3332                 }
3333                 if (th->ack) {
3334                         tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3335                         kfree_skb(skb, FREE_READ);
3336                         release_sock(sk);
3337                         return(0);
3338                 }
3339 
3340                 if (th->syn) {
3341 #if 0
3342                         if (opt->security != 0 || opt->compartment != 0) {
3343                                 tcp_reset(daddr, saddr, th, prot, opt,dev);
3344                                 release_sock(sk);
3345                                 return(0);
3346                         }
3347 #endif
3348 
3349                         /*
3350                          * Now we just put the whole thing including
3351                          * the header and saddr, and protocol pointer
3352                          * into the buffer.  We can't respond until the
3353                          * user tells us to accept the connection.
3354                          */
3355                         tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
3356                         release_sock(sk);
3357                         return(0);
3358                 }
3359 
3360                 kfree_skb(skb, FREE_READ);
3361                 release_sock(sk);
3362                 return(0);
3363 
3364         default:
3365                 if (!tcp_sequence(sk, th, len, opt, saddr,dev)) {
3366                         kfree_skb(skb, FREE_READ);
3367                         release_sock(sk);
3368                         return(0);
3369                 }
3370 
3371         case TCP_SYN_SENT:
3372                 if (th->rst) {
3373                         sk->err = ECONNREFUSED;
3374                         sk->state = TCP_CLOSE;
3375                         sk->shutdown = SHUTDOWN_MASK;
3376                         sk->zapped = 1;
3377                         if (!sk->dead) {
3378                                 sk->state_change(sk);
3379                         }
3380                         kfree_skb(skb, FREE_READ);
3381                         release_sock(sk);
3382                         return(0);
3383                 }
3384 #if 0
3385                 if (opt->security != 0 || opt->compartment != 0) {
3386                         sk->err = ECONNRESET;
3387                         sk->state = TCP_CLOSE;
3388                         sk->shutdown = SHUTDOWN_MASK;
3389                         tcp_reset(daddr, saddr,  th, sk->prot, opt, dev);
3390                         if (!sk->dead) {
3391                                 wake_up(sk->sleep);
3392                         }
3393                         kfree_skb(skb, FREE_READ);
3394                         release_sock(sk);
3395                         return(0);
3396                 }
3397 #endif
3398                 if (!th->ack) {
3399                         if (th->syn) {
3400                                 sk->state = TCP_SYN_RECV;
3401                         }
3402 
3403                         kfree_skb(skb, FREE_READ);
3404                         release_sock(sk);
3405                         return(0);
3406                 }
3407 
3408                 switch(sk->state) {
3409                         case TCP_SYN_SENT:
3410                                 if (!tcp_ack(sk, th, saddr, len)) {
3411                                         tcp_reset(daddr, saddr, th,
3412                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3413                                         kfree_skb(skb, FREE_READ);
3414                                         release_sock(sk);
3415                                         return(0);
3416                                 }
3417 
3418                                 /*
3419                                  * If the syn bit is also set, switch to
3420                                  * tcp_syn_recv, and then to established.
3421                                  */
3422                                 if (!th->syn) {
3423                                         kfree_skb(skb, FREE_READ);
3424                                         release_sock(sk);
3425                                         return(0);
3426                                 }
3427 
3428                                 /* Ack the syn and fall through. */
3429                                 sk->acked_seq = th->seq+1;
3430                                 sk->fin_seq = th->seq;
3431                                 tcp_send_ack(sk->send_seq, th->seq+1,
3432                                                         sk, th, sk->daddr);
3433         
3434                         case TCP_SYN_RECV:
3435                                 if (!tcp_ack(sk, th, saddr, len)) {
3436                                         tcp_reset(daddr, saddr, th,
3437                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
3438                                         kfree_skb(skb, FREE_READ);
3439                                         release_sock(sk);
3440                                         return(0);
3441                                 }
3442                                 sk->state = TCP_ESTABLISHED;
3443 
3444                                 /*
3445                                  * Now we need to finish filling out
3446                                  * some of the tcp header.
3447                                  */
3448                                 /* We need to check for mtu info. */
3449                                 tcp_options(sk, th);
3450                                 sk->dummy_th.dest = th->source;
3451                                 sk->copied_seq = sk->acked_seq-1;
3452                                 if (!sk->dead) {
3453                                         sk->state_change(sk);
3454                                 }
3455 
3456                                 /*
3457                                  * Now process the rest like we were
3458                                  * already in the established state.
3459                                  */
3460                                 if (th->urg) {
3461                                         if (tcp_urg(sk, th, saddr)) { 
3462                                                 kfree_skb(skb, FREE_READ);
3463                                                 release_sock(sk);
3464                                                 return(0);
3465                                         }
3466                         }
3467                         if (tcp_data(skb, sk, saddr, len))
3468                                                 kfree_skb(skb, FREE_READ);
3469 
3470                         if (th->fin) tcp_fin(sk, th, saddr, dev);
3471                         release_sock(sk);
3472                         return(0);
3473                 }
3474 
3475                 if (th->urg) {
3476                         if (tcp_urg(sk, th, saddr)) {
3477                                 kfree_skb(skb, FREE_READ);
3478                                 release_sock(sk);
3479                                 return(0);
3480                         }
3481                 }
3482 
3483                 if (tcp_data(skb, sk, saddr, len)) {
3484                         kfree_skb(skb, FREE_READ);
3485                         release_sock(sk);
3486                         return(0);
3487                 }
3488 
3489                 if (!th->fin) {
3490                         release_sock(sk);
3491                         return(0);
3492                 }
3493                 tcp_fin(sk, th, saddr, dev);
3494                 release_sock(sk);
3495                 return(0);
3496         }
3497 }
3498 
3499 
3500 /*
3501   * This routine sends a packet with an out of date sequence
3502   * number. It assumes the other end will try to ack it.
3503   */
3504 static void
3505 tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3506 {
3507   struct sk_buff *buff;
3508   struct tcphdr *t1;
3509   struct device *dev=NULL;
3510   int tmp;
3511 
3512   if (sk->zapped)
3513         return; /* Afer a valid reset we can send no more */
3514 
3515   if (sk -> state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) return;
3516 
3517   buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
3518   if (buff == NULL) return;
3519 
3520   buff->mem_addr = buff;
3521   buff->mem_len = MAX_ACK_SIZE;
3522   buff->len = sizeof(struct tcphdr);
3523   buff->free = 1;
3524   buff->sk = sk;
3525   DPRINTF((DBG_TCP, "in tcp_write_wakeup\n"));
3526   t1 = (struct tcphdr *) buff->data;
3527 
3528   /* Put in the IP header and routing stuff. */
3529   tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3530                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
3531   if (tmp < 0) {
3532         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3533         return;
3534   }
3535 
3536   buff->len += tmp;
3537   t1 = (struct tcphdr *)((char *)t1 +tmp);
3538 
3539   memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
3540 
3541   /*
3542    * Use a previous sequence.
3543    * This should cause the other end to send an ack.
3544    */
3545   t1->seq = ntohl(sk->send_seq-1);
3546   t1->ack = 1; 
3547   t1->res1= 0;
3548   t1->res2= 0;
3549   t1->rst = 0;
3550   t1->urg = 0;
3551   t1->psh = 0;
3552   t1->fin = 0;
3553   t1->syn = 0;
3554   t1->ack_seq = ntohl(sk->acked_seq);
3555   t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
3556   t1->doff = sizeof(*t1)/4;
3557   tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
3558 
3559   /* Send it and free it.
3560    * This will prevent the timer from automatically being restarted.
3561   */
3562   sk->prot->queue_xmit(sk, dev, buff, 1);
3563 }
3564 
3565 /*
3566  * This routine probes a zero window.  It makes a copy of the first
3567  * packet in the write queue, but with just one byte of data.
3568  */
3569 void
3570 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3571 {
3572   unsigned char *raw;
3573   struct iphdr *iph;
3574   struct sk_buff *skb2, *skb;
3575   int len, hlen, data;
3576   struct tcphdr *t1;
3577   struct device *dev;
3578 
3579   if (sk->zapped)
3580         return; /* Afer a valid reset we can send no more */
3581 
3582   if (sk -> state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT &&
3583       sk -> state != TCP_FIN_WAIT1 && sk->state != TCP_FIN_WAIT2)
3584         return;
3585 
3586   skb = sk->wfront;
3587   if (skb == NULL)
3588         return;
3589 
3590   dev = skb->dev;
3591   /* I know this can't happen but as it does.. */
3592   if(dev==NULL)
3593     {
3594       printk("tcp_send_probe0: NULL device bug!\n");
3595       return;
3596     }
3597   IS_SKB(skb);
3598 
3599   raw = skb->data;
3600   iph = (struct iphdr *) (raw + dev->hard_header_len);
3601 
3602   hlen = (iph->ihl * sizeof(unsigned long)) + dev->hard_header_len;
3603   data = skb->len - hlen - sizeof(struct tcphdr);
3604   len = hlen + sizeof(struct tcphdr) + (data ? 1 : 0);
3605         
3606   /* Allocate buffer. */
3607   if ((skb2 = alloc_skb(sizeof(struct sk_buff) + len,GFP_KERNEL)) == NULL) {
3608 /*    printk("alloc failed raw %x th %x hlen %d data %d len %d\n",
3609            raw, skb->h.th, hlen, data, len); */
3610     reset_timer (sk, TIME_PROBE0, 10);  /* try again real soon */
3611     return;
3612   }
3613 
3614   skb2->arp = skb->arp;
3615   skb2->len = len;
3616   skb2->h.raw = (char *)(skb2->data);
3617  
3618   sk->wmem_alloc += skb2->mem_len;
3619  
3620   /* Copy the packet header into the new buffer. */
3621   memcpy(skb2->h.raw, raw, len);
3622  
3623   skb2->h.raw += hlen;  /* it's now h.th -- pointer to the tcp header */
3624   t1 = skb2->h.th;
3625  
3626 /* source, dest, seq, from existing packet */
3627   t1->ack_seq = ntohl(sk->acked_seq);
3628   t1->res1 = 0;
3629 /* doff, fin, from existing packet.  Fin is safe because Linux always
3630  * sends fin in a separate packet
3631  * syn, rst, had better be zero in original */
3632   t1->ack = 1;
3633   t1->urg = 0;  /* urgent pointer might be beyond this fragment */
3634   t1->res2 = 0;
3635   t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
3636   t1->urg_ptr = 0;
3637   tcp_send_check(t1, sk->saddr, sk->daddr, len - hlen, sk);
3638   /* Send it and free it.
3639    * This will prevent the timer from automatically being restarted.
3640    */
3641   sk->prot->queue_xmit(sk, dev, skb2, 1);
3642   sk->backoff++;
3643   reset_timer (sk, TIME_PROBE0, 
3644                backoff (sk->backoff) * (2 * sk->mdev + sk->rtt));
3645   sk->retransmits++;
3646   sk->prot->retransmits ++;
3647 }
3648 
3649 /*
3650  *      Socket option code for TCP. 
3651  */
3652   
3653 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
3654 {
3655         int val,err;
3656 
3657         if(level!=SOL_TCP)
3658                 return ip_setsockopt(sk,level,optname,optval,optlen);
3659 
3660         if (optval == NULL) 
3661                 return(-EINVAL);
3662 
3663         err=verify_area(VERIFY_READ, optval, sizeof(int));
3664         if(err)
3665                 return err;
3666         
3667         val = get_fs_long((unsigned long *)optval);
3668 
3669         switch(optname)
3670         {
3671                 case TCP_MAXSEG:
3672                         if(val<200||val>2048 || val>sk->mtu)
3673                                 return -EINVAL;
3674                         sk->mss=val;
3675                         return 0;
3676                 case TCP_NODELAY:
3677                         sk->nonagle=(val==0)?0:1;
3678                         return 0;
3679                 default:
3680                         return(-ENOPROTOOPT);
3681         }
3682 }
3683 
3684 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
3685 {
3686         int val,err;
3687 
3688         if(level!=SOL_TCP)
3689                 return ip_getsockopt(sk,level,optname,optval,optlen);
3690                         
3691         switch(optname)
3692         {
3693                 case TCP_MAXSEG:
3694                         val=sk->mss;
3695                         break;
3696                 case TCP_NODELAY:
3697                         val=sk->nonagle;        /* Until Johannes stuff is in */
3698                         break;
3699                 default:
3700                         return(-ENOPROTOOPT);
3701         }
3702         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
3703         if(err)
3704                 return err;
3705         put_fs_long(sizeof(int),(unsigned long *) optlen);
3706 
3707         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
3708         if(err)
3709                 return err;
3710         put_fs_long(val,(unsigned long *)optval);
3711 
3712         return(0);
3713 }       
3714 
3715 
3716 struct proto tcp_prot = {
3717   sock_wmalloc,
3718   sock_rmalloc,
3719   sock_wfree,
3720   sock_rfree,
3721   sock_rspace,
3722   sock_wspace,
3723   tcp_close,
3724   tcp_read,
3725   tcp_write,
3726   tcp_sendto,
3727   tcp_recvfrom,
3728   ip_build_header,
3729   tcp_connect,
3730   tcp_accept,
3731   ip_queue_xmit,
3732   tcp_retransmit,
3733   tcp_write_wakeup,
3734   tcp_read_wakeup,
3735   tcp_rcv,
3736   tcp_select,
3737   tcp_ioctl,
3738   NULL,
3739   tcp_shutdown,
3740   tcp_setsockopt,
3741   tcp_getsockopt,
3742   128,
3743   0,
3744   {NULL,},
3745   "TCP"
3746 };

/* [previous][next][first][last][top][bottom][index][help] */