root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. print_th
  3. get_firstr
  4. diff
  5. tcp_select_window
  6. tcp_time_wait
  7. tcp_retransmit
  8. tcp_err
  9. tcp_readable
  10. tcp_select
  11. tcp_ioctl
  12. tcp_check
  13. tcp_send_check
  14. tcp_send_skb
  15. tcp_dequeue_partial
  16. tcp_send_partial
  17. tcp_enqueue_partial
  18. tcp_send_ack
  19. tcp_build_header
  20. tcp_write
  21. tcp_sendto
  22. tcp_read_wakeup
  23. cleanup_rbuf
  24. tcp_read_urg
  25. tcp_read
  26. tcp_shutdown
  27. tcp_recvfrom
  28. tcp_reset
  29. tcp_options
  30. default_mask
  31. tcp_conn_request
  32. tcp_close
  33. tcp_write_xmit
  34. sort_send
  35. tcp_ack
  36. tcp_data
  37. tcp_urg
  38. tcp_fin
  39. tcp_accept
  40. tcp_connect
  41. tcp_sequence
  42. tcp_rcv
  43. tcp_write_wakeup
  44. tcp_send_probe0
  45. tcp_setsockopt
  46. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *
  16  * Fixes:       
  17  *              Alan Cox        :       Numerous verify_area() calls
  18  *              Alan Cox        :       Set the ACK bit on a reset
  19  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  20  *                                      and was trying to connect (tcp_err()).
  21  *              Alan Cox        :       All icmp error handling was broken
  22  *                                      pointers passed where wrong and the
  23  *                                      socket was looked up backwards. Nobody
  24  *                                      tested any icmp error code obviously.
  25  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  26  *                                      on errors. select behaves and the icmp error race
  27  *                                      has gone by moving it into sock.c
  28  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  29  *                                      packets for unknown sockets.
  30  *              Alan Cox        :       tcp option processing.
  31  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  32  *              Herp Rosmanith  :       More reset fixes
  33  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  34  *                                      any kind of RST is right out.
  35  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  36  *                                      otherwise odd bits of prattle escape still
  37  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  38  *                                      LAN workplace lockups.
  39  *              Alan Cox        :       Some tidyups using the new skb list facilities
  40  *              Alan Cox        :       sk->keepopen now seems to work
  41  *              Alan Cox        :       Pulls options out correctly on accepts
  42  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  43  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  44  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  45  *              Alan Cox        :       Added some beter commenting, as the tcp is hard to follow
  46  *              Alan Cox        :       Removed incorrect check for 20 * psh
  47  *      Michael O'Reilly        :       ack < copied bug fix.
  48  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  49  *              Alan Cox        :       FIN with no memory -> CRASH
  50  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  51  *              Alan Cox        :       Added TCP options (SOL_TCP)
  52  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  53  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  54  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  55  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  56  *              Alan Cox        :       Put in missing check for SYN bit.
  57  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  58  *                                      window non shrink trick.
  59  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  60  *              Charles Hedrick :       TCP fixes
  61  *              Toomas Tamm     :       TCP window fixes
  62  *
  63  *
  64  * To Fix:
  65  *                      Possibly a problem with accept(). BSD accept never fails after
  66  *              it causes a select. Linux can - given the official select semantics I
  67  *              feel that _really_ its the BSD network programs that are bust (notably
  68  *              inetd, which hangs occasionally because of this).
  69  *                      Add VJ Fastrecovery algorithm ?
  70  *                      Protocol closedown badly messed up.
  71  *                      Incompatiblity with spider ports (tcp hangs on that 
  72  *                      socket occasionally).
  73  *              MSG_PEEK and read on same socket at once can cause crashes.
  74  *
  75  *              This program is free software; you can redistribute it and/or
  76  *              modify it under the terms of the GNU General Public License
  77  *              as published by the Free Software Foundation; either version
  78  *              2 of the License, or(at your option) any later version.
  79  */
  80 #include <linux/types.h>
  81 #include <linux/sched.h>
  82 #include <linux/mm.h>
  83 #include <linux/string.h>
  84 #include <linux/socket.h>
  85 #include <linux/sockios.h>
  86 #include <linux/termios.h>
  87 #include <linux/in.h>
  88 #include <linux/fcntl.h>
  89 #include "inet.h"
  90 #include "dev.h"
  91 #include "ip.h"
  92 #include "protocol.h"
  93 #include "icmp.h"
  94 #include "tcp.h"
  95 #include "skbuff.h"
  96 #include "sock.h"
  97 #include "arp.h"
  98 #include <linux/errno.h>
  99 #include <linux/timer.h>
 100 #include <asm/system.h>
 101 #include <asm/segment.h>
 102 #include <linux/mm.h>
 103 
 104 #define SEQ_TICK 3
 105 unsigned long seq_offset;
 106 #define SUBNETSARELOCAL
 107 
 108 static __inline__ int 
 109 min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 110 {
 111   if (a < b) return(a);
 112   return(b);
 113 }
 114 
 115 
 116 void
 117 print_th(struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
 118 {
 119   unsigned char *ptr;
 120 
 121   if (inet_debug != DBG_TCP) return;
 122 
 123   printk("TCP header:\n");
 124   ptr =(unsigned char *)(th + 1);
 125   printk("    source=%d, dest=%d, seq =%ld, ack_seq = %ld\n",
 126         ntohs(th->source), ntohs(th->dest),
 127         ntohl(th->seq), ntohl(th->ack_seq));
 128   printk("    fin=%d, syn=%d, rst=%d, psh=%d, ack=%d, urg=%d res1=%d res2=%d\n",
 129         th->fin, th->syn, th->rst, th->psh, th->ack,
 130         th->urg, th->res1, th->res2);
 131   printk("    window = %d, check = %d urg_ptr = %d\n",
 132         ntohs(th->window), ntohs(th->check), ntohs(th->urg_ptr));
 133   printk("    doff = %d\n", th->doff);
 134   printk("    options = %d %d %d %d\n", ptr[0], ptr[1], ptr[2], ptr[3]);
 135  }
 136 
 137 
 138 
 139 /* This routine grabs the first thing off of a rcv queue. */
 140 static struct sk_buff *
 141 get_firstr(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 142 {
 143   return skb_dequeue(&sk->rqueue);
 144 }
 145 
 146 /*
 147  *      Difference between two values in tcp ack terms.
 148  */
 149 
 150 static long
 151 diff(unsigned long seq1, unsigned long seq2)
     /* [previous][next][first][last][top][bottom][index][help] */
 152 {
 153   long d;
 154 
 155   d = seq1 - seq2;
 156   if (d > 0) return(d);
 157 
 158   /* I hope this returns what I want. */
 159   return(~d+1);
 160 }
 161 
 162 /* This routine picks a TCP windows for a socket based on
 163    the following constraints
 164    
 165    1. The window can never be shrunk once it is offered (RFC 793)
 166    2. We limit memory per socket
 167    
 168    For now we use NET2E3's heuristic of offering half the memory
 169    we have handy. All is not as bad as this seems however because
 170    of two things. Firstly we will bin packets even within the window
 171    in order to get the data we are waiting for into the memory limit.
 172    Secondly we bin common duplicate forms at receive time
 173 
 174    Better heuristics welcome
 175 */
 176    
 177 static int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 178 {
 179         int new_window = sk->prot->rspace(sk);
 180 
 181 /*
 182  * two things are going on here.  First, we don't ever offer a
 183  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 184  * receiver side of SWS as specified in RFC1122.
 185  * Second, we always give them at least the window they
 186  * had before, in order to avoid retracting window.  This
 187  * is technically allowed, but RFC1122 advises against it and
 188  * in practice it causes trouble.
 189  */
 190         if (new_window < min(sk->mss, MAX_WINDOW/2) ||
 191             new_window < sk->window)
 192           return(sk->window);
 193         return(new_window);
 194 }
 195 
 196 /* Enter the time wait state. */
 197 
 198 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 199 {
 200   sk->state = TCP_TIME_WAIT;
 201   sk->shutdown = SHUTDOWN_MASK;
 202   if (!sk->dead)
 203         sk->state_change(sk);
 204   reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 205 }
 206 
 207 /*
 208  *      A timer event has trigger a tcp retransmit timeout. The
 209  *      socket xmit queue is ready and set up to send. Because
 210  *      the ack receive code keeps the queue straight we do
 211  *      nothing clever here.
 212  */
 213 
 214 static void
 215 tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 216 {
 217   if (all) {
 218         ip_retransmit(sk, all);
 219         return;
 220   }
 221 
 222   sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 223   /* sk->ssthresh in theory can be zero.  I guess that's OK */
 224   sk->cong_count = 0;
 225 
 226   sk->cong_window = 1;
 227 
 228   /* Do the actual retransmit. */
 229   ip_retransmit(sk, all);
 230 }
 231 
 232 
 233 /*
 234  * This routine is called by the ICMP module when it gets some
 235  * sort of error condition.  If err < 0 then the socket should
 236  * be closed and the error returned to the user.  If err > 0
 237  * it's just the icmp type << 8 | icmp code.  After adjustment
 238  * header points to the first 8 bytes of the tcp header.  We need
 239  * to find the appropriate port.
 240  */
 241 void
 242 tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 243         unsigned long saddr, struct inet_protocol *protocol)
 244 {
 245   struct tcphdr *th;
 246   struct sock *sk;
 247   struct iphdr *iph=(struct iphdr *)header;
 248   
 249   header+=4*iph->ihl;
 250    
 251   DPRINTF((DBG_TCP, "TCP: tcp_err(%d, hdr=%X, daddr=%X saddr=%X, protocol=%X)\n",
 252                                         err, header, daddr, saddr, protocol));
 253 
 254   th =(struct tcphdr *)header;
 255   sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr);
 256   print_th(th);
 257 
 258   if (sk == NULL) return;
 259   
 260   if(err<0)
 261   {
 262         sk->err = -err;
 263         sk->error_report(sk);
 264         return;
 265   }
 266 
 267   if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) {
 268         /*
 269          * FIXME:
 270          * For now we will just trigger a linear backoff.
 271          * The slow start code should cause a real backoff here.
 272          */
 273         if (sk->cong_window > 4) sk->cong_window--;
 274         return;
 275   }
 276 
 277   DPRINTF((DBG_TCP, "TCP: icmp_err got error\n"));
 278   sk->err = icmp_err_convert[err & 0xff].errno;
 279 
 280   /*
 281    * If we've already connected we will keep trying
 282    * until we time out, or the user gives up.
 283    */
 284   if (icmp_err_convert[err & 0xff].fatal) {
 285         if (sk->state == TCP_SYN_SENT) {
 286                 sk->state = TCP_CLOSE;
 287                 sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 288         }
 289   }
 290   return;
 291 }
 292 
 293 
 294 /*
 295  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 296  *      in the received data queue (ie a frame missing that needs sending to us)
 297  */
 298 
 299 static int
 300 tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 301 {
 302   unsigned long counted;
 303   unsigned long amount;
 304   struct sk_buff *skb;
 305   int count=0;
 306   int sum;
 307   unsigned long flags;
 308 
 309   DPRINTF((DBG_TCP, "tcp_readable(sk=%X)\n", sk));
 310   if(sk && sk->debug)
 311         printk("tcp_readable: %p - ",sk);
 312 
 313   if (sk == NULL || skb_peek(&sk->rqueue) == NULL)      /* Empty sockets are easy! */
 314   {
 315         if(sk && sk->debug) 
 316                 printk("empty\n");
 317         return(0);
 318   }
 319   
 320   counted = sk->copied_seq+1;   /* Where we are at the moment */
 321   amount = 0;
 322   
 323   save_flags(flags);            /* So nobody adds things at the wrong moment */
 324   cli();
 325   skb =(struct sk_buff *)sk->rqueue;
 326 
 327   /* Do until a push or until we are out of data. */
 328   do {
 329         count++;
 330 #ifdef OLD      
 331         /* This is wrong: It breaks Chameleon amongst other stacks */
 332         if (count > 20) {
 333                 restore_flags(flags);
 334                 DPRINTF((DBG_TCP, "tcp_readable, more than 20 packets without a psh\n"));
 335                 printk("tcp_read: possible read_queue corruption.\n");
 336                 return(amount);
 337         }
 338 #endif  
 339         if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 340                 break;
 341         sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 342         if (skb->h.th->syn) sum++;
 343         if (skb->h.th->urg) {
 344                 sum -= ntohs(skb->h.th->urg_ptr);       /* Dont count urg data */
 345         }
 346         if (sum >= 0) {                                 /* Add it up, move on */
 347                 amount += sum;
 348                 if (skb->h.th->syn) amount--;
 349                 counted += sum;
 350         }
 351         if (amount && skb->h.th->psh) break;
 352         skb =(struct sk_buff *)skb->next;               /* Move along */
 353   } while(skb != sk->rqueue);
 354   restore_flags(flags);
 355   DPRINTF((DBG_TCP, "tcp readable returning %d bytes\n", amount));
 356   if(sk->debug)
 357         printk("got %lu bytes.\n",amount);
 358   return(amount);
 359 }
 360 
 361 
 362 /*
 363  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 364  *      listening socket has a receive queue of sockets to accept.
 365  */
 366 
 367 static int
 368 tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 369 {
 370   DPRINTF((DBG_TCP, "tcp_select(sk=%X, sel_type = %d, wait = %X)\n",
 371                                                 sk, sel_type, wait));
 372 
 373   sk->inuse = 1;
 374   switch(sel_type) {
 375         case SEL_IN:
 376                 if(sk->debug)
 377                         printk("select in");
 378                 select_wait(sk->sleep, wait);
 379                 if(sk->debug)
 380                         printk("-select out");
 381                 if (skb_peek(&sk->rqueue) != NULL) {
 382                         if (sk->state == TCP_LISTEN || tcp_readable(sk)) {
 383                                 release_sock(sk);
 384                                 if(sk->debug)
 385                                         printk("-select ok data\n");
 386                                 return(1);
 387                         }
 388                 }
 389                 if (sk->err != 0)       /* Receiver error */
 390                 {
 391                         release_sock(sk);
 392                         if(sk->debug)
 393                                 printk("-select ok error");
 394                         return(1);
 395                 }
 396                 if (sk->shutdown & RCV_SHUTDOWN) {
 397                         release_sock(sk);
 398                         if(sk->debug)
 399                                 printk("-select ok down\n");
 400                         return(1);
 401                 } else {
 402                         release_sock(sk);
 403                         if(sk->debug)
 404                                 printk("-select fail\n");
 405                         return(0);
 406                 }
 407         case SEL_OUT:
 408                 select_wait(sk->sleep, wait);
 409                 if (sk->shutdown & SEND_SHUTDOWN) {
 410                         DPRINTF((DBG_TCP,
 411                                 "write select on shutdown socket.\n"));
 412 
 413                         /* FIXME: should this return an error? */
 414                         release_sock(sk);
 415                         return(0);
 416                 }
 417 
 418                 /*
 419                  * FIXME:
 420                  * Hack so it will probably be able to write
 421                  * something if it says it's ok to write.
 422                  */
 423                 if (sk->prot->wspace(sk) >= sk->mss) {
 424                         release_sock(sk);
 425                         /* This should cause connect to work ok. */
 426                         if (sk->state == TCP_SYN_RECV ||
 427                             sk->state == TCP_SYN_SENT) return(0);
 428                         return(1);
 429                 }
 430                 DPRINTF((DBG_TCP,
 431                         "tcp_select: sleeping on write sk->wmem_alloc = %d, "
 432                         "sk->packets_out = %d\n"
 433                         "sk->wback = %X, sk->wfront = %X\n"
 434                         "sk->send_seq = %u, sk->window_seq=%u\n", 
 435                                 sk->wmem_alloc, sk->packets_out,
 436                                 sk->wback, sk->wfront,
 437                                 sk->send_seq, sk->window_seq));
 438 
 439                 release_sock(sk);
 440                 return(0);
 441         case SEL_EX:
 442                 select_wait(sk->sleep,wait);
 443                 if (sk->err) {
 444                         release_sock(sk);
 445                         return(1);
 446                 }
 447                 release_sock(sk);
 448                 return(0);
 449   }
 450 
 451   release_sock(sk);
 452   return(0);
 453 }
 454 
 455 
 456 int
 457 tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 458 {
 459   int err;
 460   DPRINTF((DBG_TCP, "tcp_ioctl(sk=%X, cmd = %d, arg=%X)\n", sk, cmd, arg));
 461   switch(cmd) {
 462         case DDIOCSDBG:
 463                 return(dbg_ioctl((void *) arg, DBG_TCP));
 464 
 465         case TIOCINQ:
 466 #ifdef FIXME    /* FIXME: */
 467         case FIONREAD:
 468 #endif
 469                 {
 470                         unsigned long amount;
 471 
 472                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 473 
 474                         sk->inuse = 1;
 475                         amount = tcp_readable(sk);
 476                         release_sock(sk);
 477                         DPRINTF((DBG_TCP, "returning %d\n", amount));
 478                         err=verify_area(VERIFY_WRITE,(void *)arg,
 479                                                    sizeof(unsigned long));
 480                         if(err)
 481                                 return err;
 482                         put_fs_long(amount,(unsigned long *)arg);
 483                         return(0);
 484                 }
 485         case SIOCATMARK:
 486                 {
 487                         struct sk_buff *skb;
 488                         int answ = 0;
 489 
 490                         /*
 491                          * Try to figure out if we need to read
 492                          * some urgent data.
 493                          */
 494                         sk->inuse = 1;
 495                         if ((skb=skb_peek(&sk->rqueue)) != NULL) 
 496                         {
 497                                 if (sk->copied_seq+1 == skb->h.th->seq && skb->h.th->urg) 
 498                                                 answ = 1;
 499                         }
 500                         release_sock(sk);
 501                         err=verify_area(VERIFY_WRITE,(void *) arg,
 502                                                   sizeof(unsigned long));
 503                         if(err)
 504                                 return err;
 505                         put_fs_long(answ,(int *) arg);
 506                         return(0);
 507                 }
 508         case TIOCOUTQ:
 509                 {
 510                         unsigned long amount;
 511 
 512                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 513                         amount = sk->prot->wspace(sk);
 514                         err=verify_area(VERIFY_WRITE,(void *)arg,
 515                                                    sizeof(unsigned long));
 516                         if(err)
 517                                 return err;
 518                         put_fs_long(amount,(unsigned long *)arg);
 519                         return(0);
 520                 }
 521         default:
 522                 return(-EINVAL);
 523   }
 524 }
 525 
 526 
 527 /* This routine computes a TCP checksum. */
 528 unsigned short
 529 tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 530           unsigned long saddr, unsigned long daddr)
 531 {     
 532   unsigned long sum;
 533    
 534   if (saddr == 0) saddr = my_addr();
 535   print_th(th);
 536   __asm__("\t addl %%ecx,%%ebx\n"
 537           "\t adcl %%edx,%%ebx\n"
 538           "\t adcl $0, %%ebx\n"
 539           : "=b"(sum)
 540           : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 541           : "cx","bx","dx" );
 542    
 543   if (len > 3) {
 544         __asm__("\tclc\n"
 545                 "1:\n"
 546                 "\t lodsl\n"
 547                 "\t adcl %%eax, %%ebx\n"
 548                 "\t loop 1b\n"
 549                 "\t adcl $0, %%ebx\n"
 550                 : "=b"(sum) , "=S"(th)
 551                 : "0"(sum), "c"(len/4) ,"1"(th)
 552                 : "ax", "cx", "bx", "si" );
 553   }
 554    
 555   /* Convert from 32 bits to 16 bits. */
 556   __asm__("\t movl %%ebx, %%ecx\n"
 557           "\t shrl $16,%%ecx\n"
 558           "\t addw %%cx, %%bx\n"
 559           "\t adcw $0, %%bx\n"
 560           : "=b"(sum)
 561           : "0"(sum)
 562           : "bx", "cx");
 563    
 564   /* Check for an extra word. */
 565   if ((len & 2) != 0) {
 566         __asm__("\t lodsw\n"
 567                 "\t addw %%ax,%%bx\n"
 568                 "\t adcw $0, %%bx\n"
 569                 : "=b"(sum), "=S"(th)
 570                 : "0"(sum) ,"1"(th)
 571                 : "si", "ax", "bx");
 572   }
 573    
 574   /* Now check for the extra byte. */
 575   if ((len & 1) != 0) {
 576         __asm__("\t lodsb\n"
 577                 "\t movb $0,%%ah\n"
 578                 "\t addw %%ax,%%bx\n"
 579                 "\t adcw $0, %%bx\n"
 580                 : "=b"(sum)
 581                 : "0"(sum) ,"S"(th)
 582                 : "si", "ax", "bx");
 583   }
 584    
 585   /* We only want the bottom 16 bits, but we never cleared the top 16. */
 586   return((~sum) & 0xffff);
 587 }
 588 
 589 
 590 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 591                 unsigned long daddr, int len, struct sock *sk)
 592 {
 593         th->check = 0;
 594         th->check = tcp_check(th, len, saddr, daddr);
 595         return;
 596 }
 597 
 598 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 599 {
 600         int size;
 601 
 602         /* length of packet (not counting length of pre-tcp headers) */
 603         size = skb->len - ((unsigned char *) skb->h.th - skb->data);
 604 
 605         /* sanity check it.. */
 606         if (size < sizeof(struct tcphdr) || size > skb->len) {
 607                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 608                         skb, skb->data, skb->h.th, skb->len);
 609                 kfree_skb(skb, FREE_WRITE);
 610                 return;
 611         }
 612 
 613         /* If we have queued a header size packet.. */
 614         if (size == sizeof(struct tcphdr)) {
 615                 /* If its got a syn or fin its notionally included in the size..*/
 616                 if(!skb->h.th->syn && !skb->h.th->fin) {
 617                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 618                         kfree_skb(skb,FREE_WRITE);
 619                         return;
 620                 }
 621         }
 622   
 623         /* We need to complete and send the packet. */
 624         tcp_send_check(skb->h.th, sk->saddr, sk->daddr, size, sk);
 625 
 626         skb->h.seq = sk->send_seq;
 627         if (after(sk->send_seq , sk->window_seq) ||
 628             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 629              sk->packets_out >= sk->cong_window) {
 630                 DPRINTF((DBG_TCP, "sk->cong_window = %d, sk->packets_out = %d\n",
 631                                         sk->cong_window, sk->packets_out));
 632                 DPRINTF((DBG_TCP, "sk->send_seq = %d, sk->window_seq = %d\n",
 633                                         sk->send_seq, sk->window_seq));
 634                 skb->next = NULL;
 635                 skb->magic = TCP_WRITE_QUEUE_MAGIC;
 636                 if (sk->wback == NULL) {
 637                         sk->wfront = skb;
 638                 } else {
 639                         sk->wback->next = skb;
 640                 }
 641                 sk->wback = skb;
 642                 if (before(sk->window_seq, sk->wfront->h.seq) &&
 643                     sk->send_head == NULL &&
 644                     sk->ack_backlog == 0)
 645                   reset_timer(sk, TIME_PROBE0, sk->rto);
 646         } else {
 647                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 648         }
 649 }
 650 
 651 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 652 {
 653         struct sk_buff * skb;
 654         unsigned long flags;
 655 
 656         save_flags(flags);
 657         cli();
 658         skb = sk->partial;
 659         if (skb) {
 660                 sk->partial = NULL;
 661                 del_timer(&sk->partial_timer);
 662         }
 663         restore_flags(flags);
 664         return skb;
 665 }
 666 
 667 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 668 {
 669         struct sk_buff *skb;
 670 
 671         if (sk == NULL)
 672                 return;
 673         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 674                 tcp_send_skb(sk, skb);
 675 }
 676 
 677 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 678 {
 679         struct sk_buff * tmp;
 680         unsigned long flags;
 681 
 682         save_flags(flags);
 683         cli();
 684         tmp = sk->partial;
 685         if (tmp)
 686                 del_timer(&sk->partial_timer);
 687         sk->partial = skb;
 688         sk->partial_timer.expires = HZ;
 689         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 690         sk->partial_timer.data = (unsigned long) sk;
 691         add_timer(&sk->partial_timer);
 692         restore_flags(flags);
 693         if (tmp)
 694                 tcp_send_skb(sk, tmp);
 695 }
 696 
 697 
 698 /* This routine sends an ack and also updates the window. */
 699 static void
 700 tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 701              struct sock *sk,
 702              struct tcphdr *th, unsigned long daddr)
 703 {
 704   struct sk_buff *buff;
 705   struct tcphdr *t1;
 706   struct device *dev = NULL;
 707   int tmp;
 708 
 709   if(sk->zapped)
 710         return;         /* We have been reset, we may not send again */
 711   /*
 712    * We need to grab some memory, and put together an ack,
 713    * and then put it into the queue to be sent.
 714    */
 715   buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 716   if (buff == NULL) {
 717         /* Force it to send an ack. */
 718         sk->ack_backlog++;
 719         if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) {
 720                 reset_timer(sk, TIME_WRITE, 10);
 721         }
 722 if (inet_debug == DBG_SLIP) printk("\rtcp_ack: malloc failed\n");
 723         return;
 724   }
 725 
 726   buff->mem_addr = buff;
 727   buff->mem_len = MAX_ACK_SIZE;
 728   buff->len = sizeof(struct tcphdr);
 729   buff->sk = sk;
 730   t1 =(struct tcphdr *) buff->data;
 731 
 732   /* Put in the IP header and routing stuff. */
 733   tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 734                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 735   if (tmp < 0) {
 736         buff->free=1;
 737         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 738 if (inet_debug == DBG_SLIP) printk("\rtcp_ack: build_header failed\n");
 739         return;
 740   }
 741   buff->len += tmp;
 742   t1 =(struct tcphdr *)((char *)t1 +tmp);
 743 
 744   /* FIXME: */
 745   memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 746 
 747   /* swap the send and the receive. */
 748   t1->dest = th->source;
 749   t1->source = th->dest;
 750   t1->seq = ntohl(sequence);
 751   t1->ack = 1;
 752   sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
 753   t1->window = ntohs(sk->window);
 754   t1->res1 = 0;
 755   t1->res2 = 0;
 756   t1->rst = 0;
 757   t1->urg = 0;
 758   t1->syn = 0;
 759   t1->psh = 0;
 760   t1->fin = 0;
 761   if (ack == sk->acked_seq) {
 762         sk->ack_backlog = 0;
 763         sk->bytes_rcv = 0;
 764         sk->ack_timed = 0;
 765         if (sk->send_head == NULL && sk->wfront == NULL && sk->timeout == TIME_WRITE) 
 766         {
 767                 if(sk->keepopen)
 768                         reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 769                 else
 770                         delete_timer(sk);
 771         }
 772   }
 773   t1->ack_seq = ntohl(ack);
 774   t1->doff = sizeof(*t1)/4;
 775   tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 776   if (sk->debug)
 777          printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 778   sk->prot->queue_xmit(sk, dev, buff, 1);
 779 }
 780 
 781 
 782 /* This routine builds a generic TCP header. */
 783 static int
 784 tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 785 {
 786 
 787   /* FIXME: want to get rid of this. */
 788   memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 789   th->seq = htonl(sk->send_seq);
 790   th->psh =(push == 0) ? 1 : 0;
 791   th->doff = sizeof(*th)/4;
 792   th->ack = 1;
 793   th->fin = 0;
 794   sk->ack_backlog = 0;
 795   sk->bytes_rcv = 0;
 796   sk->ack_timed = 0;
 797   th->ack_seq = htonl(sk->acked_seq);
 798   sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/;
 799   th->window = htons(sk->window);
 800 
 801   return(sizeof(*th));
 802 }
 803 
 804 /*
 805  * This routine copies from a user buffer into a socket,
 806  * and starts the transmit system.
 807  */
 808 static int
 809 tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 810           int len, int nonblock, unsigned flags)
 811 {
 812   int copied = 0;
 813   int copy;
 814   int tmp;
 815   struct sk_buff *skb;
 816   struct sk_buff *send_tmp;
 817   unsigned char *buff;
 818   struct proto *prot;
 819   struct device *dev = NULL;
 820 
 821   DPRINTF((DBG_TCP, "tcp_write(sk=%X, from=%X, len=%d, nonblock=%d, flags=%X)\n",
 822                                         sk, from, len, nonblock, flags));
 823 
 824   sk->inuse=1;
 825   prot = sk->prot;
 826   while(len > 0) {
 827         if (sk->err) {                  /* Stop on an error */
 828                 release_sock(sk);
 829                 if (copied) return(copied);
 830                 tmp = -sk->err;
 831                 sk->err = 0;
 832                 return(tmp);
 833         }
 834 
 835         /* First thing we do is make sure that we are established. */    
 836         if (sk->shutdown & SEND_SHUTDOWN) {
 837                 release_sock(sk);
 838                 sk->err = EPIPE;
 839                 if (copied) return(copied);
 840                 sk->err = 0;
 841                 return(-EPIPE);
 842         }
 843 
 844 
 845         /* Wait for a connection to finish. */
 846         
 847         while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) {
 848                 if (sk->err) {
 849                         release_sock(sk);
 850                         if (copied) return(copied);
 851                         tmp = -sk->err;
 852                         sk->err = 0;
 853                         return(tmp);
 854                 }
 855 
 856                 if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) {
 857                         release_sock(sk);
 858                         DPRINTF((DBG_TCP, "tcp_write: return 1\n"));
 859                         if (copied) return(copied);
 860 
 861                         if (sk->err) {
 862                                 tmp = -sk->err;
 863                                 sk->err = 0;
 864                                 return(tmp);
 865                         }
 866 
 867                         if (sk->keepopen) {
 868                                 send_sig(SIGPIPE, current, 0);
 869                         }
 870                         return(-EPIPE);
 871                 }
 872 
 873                 if (nonblock || copied) {
 874                         release_sock(sk);
 875                         DPRINTF((DBG_TCP, "tcp_write: return 2\n"));
 876                         if (copied) return(copied);
 877                         return(-EAGAIN);
 878                 }
 879 
 880                 release_sock(sk);
 881                 cli();
 882                 if (sk->state != TCP_ESTABLISHED &&
 883                     sk->state != TCP_CLOSE_WAIT && sk->err == 0) {
 884                         interruptible_sleep_on(sk->sleep);
 885                         if (current->signal & ~current->blocked) {
 886                                 sti();
 887                                 DPRINTF((DBG_TCP, "tcp_write: return 3\n"));
 888                                 if (copied) return(copied);
 889                                 return(-ERESTARTSYS);
 890                         }
 891                 }
 892                 sk->inuse = 1;
 893                 sti();
 894         }
 895 
 896 /*
 897  * The following code can result in copy <= if sk->mss is ever
 898  * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
 899  * sk->mtu is constant once SYN processing is finished.  I.e. we
 900  * had better not get here until we've seen his SYN and at least one
 901  * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
 902  * But ESTABLISHED should guarantee that.  sk->max_window is by definition
 903  * non-decreasing.  Note that any ioctl to set user_mss must be done
 904  * before the exchange of SYN's.  If the initial ack from the other
 905  * end has a window of 0, max_window and thus mss will both be 0.
 906  */
 907 
 908         /* Now we need to check if we have a half built packet. */
 909         if ((skb = tcp_dequeue_partial(sk)) != NULL) {
 910                 int hdrlen;
 911 
 912                  /* IP header + TCP header */
 913                 hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
 914                          + sizeof(struct tcphdr);
 915 
 916                 /* Add more stuff to the end of skb->len */
 917                 if (!(flags & MSG_OOB)) {
 918                         copy = min(sk->mss - (skb->len - hdrlen), len);
 919                         /* FIXME: this is really a bug. */
 920                         if (copy <= 0) {
 921                           printk("TCP: **bug**: \"copy\" <= 0!!\n");
 922                           copy = 0;
 923                         }
 924           
 925                         memcpy_fromfs(skb->data + skb->len, from, copy);
 926                         skb->len += copy;
 927                         from += copy;
 928                         copied += copy;
 929                         len -= copy;
 930                         sk->send_seq += copy;
 931                       }
 932                 if ((skb->len - hdrlen) >= sk->mss ||
 933                     (flags & MSG_OOB) ||
 934                     !sk->packets_out)
 935                         tcp_send_skb(sk, skb);
 936                 else
 937                         tcp_enqueue_partial(skb, sk);
 938                 continue;
 939         }
 940 
 941         /*
 942          * We also need to worry about the window.
 943          * If window < 1/2 the maximum window we've seen from this
 944          *   host, don't use it.  This is sender side
 945          *   silly window prevention, as specified in RFC1122.
 946          *   (Note that this is diffferent than earlier versions of
 947          *   SWS prevention, e.g. RFC813.).  What we actually do is 
 948          *   use the whole MSS.  Since the results in the right
 949          *   edge of the packet being outside the window, it will
 950          *   be queued for later rather than sent.
 951          */
 952 
 953         copy = diff(sk->window_seq, sk->send_seq);
 954         /* what if max_window == 1?  In that case max_window >> 1 is 0.
 955          * however in that case copy == max_window, so it's OK to use 
 956          * the window */
 957         if (copy < (sk->max_window >> 1))
 958           copy = sk->mss;
 959         copy = min(copy, sk->mss);
 960         copy = min(copy, len);
 961 
 962   /* We should really check the window here also. */
 963         send_tmp = NULL;
 964         if (copy < sk->mss && !(flags & MSG_OOB)) {
 965         /* We will release the socket incase we sleep here. */
 966           release_sock(sk);
 967           /* NB: following must be mtu, because mss can be increased.
 968            * mss is always <= mtu */
 969           skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header + sizeof(*skb), 0, GFP_KERNEL);
 970           sk->inuse = 1;
 971           send_tmp = skb;
 972         } else {
 973                 /* We will release the socket incase we sleep here. */
 974           release_sock(sk);
 975           skb = prot->wmalloc(sk, copy + prot->max_header + sizeof(*skb), 0, GFP_KERNEL);
 976           sk->inuse = 1;
 977         }
 978 
 979         /* If we didn't get any memory, we need to sleep. */
 980         if (skb == NULL) {
 981                 if (nonblock /* || copied */) {
 982                         release_sock(sk);
 983                         DPRINTF((DBG_TCP, "tcp_write: return 4\n"));
 984                         if (copied) return(copied);
 985                         return(-EAGAIN);
 986                 }
 987 
 988                 /* FIXME: here is another race condition. */
 989                 tmp = sk->wmem_alloc;
 990                 release_sock(sk);
 991                 cli();
 992                 /* Again we will try to avoid it. */
 993                 if (tmp <= sk->wmem_alloc &&
 994                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
 995                                 && sk->err == 0) {
 996                         interruptible_sleep_on(sk->sleep);
 997                         if (current->signal & ~current->blocked) {
 998                                 sti();
 999                                 DPRINTF((DBG_TCP, "tcp_write: return 5\n"));
1000                                 if (copied) return(copied);
1001                                 return(-ERESTARTSYS);
1002                         }
1003                 }
1004                 sk->inuse = 1;
1005                 sti();
1006                 continue;
1007         }
1008 
1009         skb->len = 0;
1010         skb->sk = sk;
1011         skb->free = 0;
1012 
1013         buff = skb->data;
1014 
1015         /*
1016          * FIXME: we need to optimize this.
1017          * Perhaps some hints here would be good.
1018          */
1019         tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1020                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1021         if (tmp < 0 ) {
1022                 prot->wfree(sk, skb->mem_addr, skb->mem_len);
1023                 release_sock(sk);
1024                 DPRINTF((DBG_TCP, "tcp_write: return 6\n"));
1025                 if (copied) return(copied);
1026                 return(tmp);
1027         }
1028         skb->len += tmp;
1029         skb->dev = dev;
1030         buff += tmp;
1031         skb->h.th =(struct tcphdr *) buff;
1032         tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1033         if (tmp < 0) {
1034                 prot->wfree(sk, skb->mem_addr, skb->mem_len);
1035                 release_sock(sk);
1036                 DPRINTF((DBG_TCP, "tcp_write: return 7\n"));
1037                 if (copied) return(copied);
1038                 return(tmp);
1039         }
1040 
1041         if (flags & MSG_OOB) {
1042                 ((struct tcphdr *)buff)->urg = 1;
1043                 ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1044         }
1045         skb->len += tmp;
1046         memcpy_fromfs(buff+tmp, from, copy);
1047 
1048         from += copy;
1049         copied += copy;
1050         len -= copy;
1051         skb->len += copy;
1052         skb->free = 0;
1053         sk->send_seq += copy;
1054 
1055         if (send_tmp != NULL && sk->packets_out) {
1056                 tcp_enqueue_partial(send_tmp, sk);
1057                 continue;
1058         }
1059         tcp_send_skb(sk, skb);
1060   }
1061   sk->err = 0;
1062 
1063 /*
1064  *      Nagles rule. Turn Nagle off with TCP_NODELAY for highly
1065  *      interactive fast network servers. It's meant to be on and
1066  *      it really improves the throughput though not the echo time
1067  *      on my slow slip link - Alan
1068  */
1069 
1070   /* Avoid possible race on send_tmp - c/o Johannes Stille */
1071   if(sk->partial && 
1072      ((!sk->packets_out) 
1073      /* If not nagling we can send on the before case too.. */
1074       || (sk->nonagle && before(sk->send_seq , sk->window_seq))
1075       ))
1076         tcp_send_partial(sk);
1077   /* -- */
1078   release_sock(sk);
1079   DPRINTF((DBG_TCP, "tcp_write: return 8\n"));
1080   return(copied);
1081 }
1082 
1083 
1084 static int
1085 tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1086            int len, int nonblock, unsigned flags,
1087            struct sockaddr_in *addr, int addr_len)
1088 {
1089   struct sockaddr_in sin;
1090 
1091   if (addr_len < sizeof(sin)) return(-EINVAL);
1092   memcpy_fromfs(&sin, addr, sizeof(sin));
1093   if (sin.sin_family && sin.sin_family != AF_INET) return(-EINVAL);
1094   if (sin.sin_port != sk->dummy_th.dest) return(-EINVAL);
1095   if (sin.sin_addr.s_addr != sk->daddr) return(-EINVAL);
1096   return(tcp_write(sk, from, len, nonblock, flags));
1097 }
1098 
1099 
1100 static void
1101 tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1102 {
1103   int tmp;
1104   struct device *dev = NULL;
1105   struct tcphdr *t1;
1106   struct sk_buff *buff;
1107 
1108   DPRINTF((DBG_TCP, "in tcp read wakeup\n"));
1109   if (!sk->ack_backlog) return;
1110 
1111   /*
1112    * FIXME: we need to put code here to prevent this routine from
1113    * being called.  Being called once in a while is ok, so only check
1114    * if this is the second time in a row.
1115    */
1116 
1117   /*
1118    * We need to grab some memory, and put together an ack,
1119    * and then put it into the queue to be sent.
1120    */
1121   buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1122   if (buff == NULL) {
1123         /* Try again real soon. */
1124         reset_timer(sk, TIME_WRITE, 10);
1125         return;
1126   }
1127 
1128   buff->mem_addr = buff;
1129   buff->mem_len = MAX_ACK_SIZE;
1130   buff->len = sizeof(struct tcphdr);
1131   buff->sk = sk;
1132 
1133   /* Put in the IP header and routing stuff. */
1134   tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1135                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1136   if (tmp < 0) {
1137         buff->free=1;
1138         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1139         return;
1140   }
1141 
1142   buff->len += tmp;
1143   t1 =(struct tcphdr *)(buff->data +tmp);
1144 
1145   memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1146   t1->seq = ntohl(sk->send_seq);
1147   t1->ack = 1;
1148   t1->res1 = 0;
1149   t1->res2 = 0;
1150   t1->rst = 0;
1151   t1->urg = 0;
1152   t1->syn = 0;
1153   t1->psh = 0;
1154   sk->ack_backlog = 0;
1155   sk->bytes_rcv = 0;
1156   sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
1157   t1->window = ntohs(sk->window);
1158   t1->ack_seq = ntohl(sk->acked_seq);
1159   t1->doff = sizeof(*t1)/4;
1160   tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1161   sk->prot->queue_xmit(sk, dev, buff, 1);
1162 }
1163 
1164 
1165 /*
1166  * FIXME:
1167  * This routine frees used buffers.
1168  * It should consider sending an ACK to let the
1169  * other end know we now have a bigger window.
1170  */
1171 static void
1172 cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1173 {
1174   unsigned long flags;
1175   int left;
1176   struct sk_buff *skb;
1177 
1178   if(sk->debug)
1179         printk("cleaning rbuf for sk=%p\n", sk);
1180   
1181   save_flags(flags);
1182   cli();
1183   
1184   left = sk->prot->rspace(sk);
1185  
1186   /*
1187    * We have to loop through all the buffer headers,
1188    * and try to free up all the space we can.
1189    */
1190   while((skb=skb_peek(&sk->rqueue)) != NULL ) 
1191   {
1192         if (!skb->used) 
1193                 break;
1194         skb_unlink(skb);
1195         skb->sk = sk;
1196         kfree_skb(skb, FREE_READ);
1197   }
1198 
1199   restore_flags(flags);
1200 
1201   /*
1202    * FIXME:
1203    * At this point we should send an ack if the difference
1204    * in the window, and the amount of space is bigger than
1205    * TCP_WINDOW_DIFF.
1206    */
1207   DPRINTF((DBG_TCP, "sk->window left = %d, sk->prot->rspace(sk)=%d\n",
1208                         sk->window - sk->bytes_rcv, sk->prot->rspace(sk)));
1209 
1210   if(sk->debug)
1211         printk("sk->rspace = %lu, was %d\n", sk->prot->rspace(sk),
1212                                             left);
1213   if (sk->prot->rspace(sk) != left) 
1214   {
1215         /*
1216          * This area has caused the most trouble.  The current strategy
1217          * is to simply do nothing if the other end has room to send at
1218          * least 3 full packets, because the ack from those will auto-
1219          * matically update the window.  If the other end doesn't think
1220          * we have much space left, but we have room for atleast 1 more
1221          * complete packet than it thinks we do, we will send an ack
1222          * immediatedly.  Otherwise we will wait up to .5 seconds in case
1223          * the user reads some more.
1224          */
1225         sk->ack_backlog++;
1226 /*
1227  * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1228  * if the other end is offering a window smaller than the agreed on MSS
1229  * (called sk->mtu here).  In theory there's no connection between send
1230  * and receive, and so no reason to think that they're going to send
1231  * small packets.  For the moment I'm using the hack of reducing the mss
1232  * only on the send side, so I'm putting mtu here.
1233  */
1234         if ((sk->prot->rspace(sk) > (sk->window - sk->bytes_rcv + sk->mtu))) {
1235                 /* Send an ack right now. */
1236                 tcp_read_wakeup(sk);
1237         } else {
1238                 /* Force it to send an ack soon. */
1239                 int was_active = del_timer(&sk->timer);
1240                 if (!was_active || TCP_ACK_TIME < sk->timer.expires) {
1241                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1242                 } else
1243                         add_timer(&sk->timer);
1244         }
1245   }
1246 } 
1247 
1248 
1249 /* Handle reading urgent data. */
1250 static int
1251 tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1252              unsigned char *to, int len, unsigned flags)
1253 {
1254   int copied = 0;
1255   struct sk_buff *skb;
1256 
1257   DPRINTF((DBG_TCP, "tcp_read_urg(sk=%X, to=%X, len=%d, flags=%X)\n",
1258                                         sk, to, len, flags));
1259 
1260   while(len > 0) 
1261   {
1262         sk->inuse = 1;
1263         while(sk->urg==0 || skb_peek(&sk->rqueue) == NULL) {
1264                 if (sk->err) {
1265                         int tmp;
1266 
1267                         release_sock(sk);
1268                         if (copied) return(copied);
1269                         tmp = -sk->err;
1270                         sk->err = 0;
1271                         return(tmp);
1272                 }
1273 
1274                 if (sk->state == TCP_CLOSE || sk->done) {
1275                         release_sock(sk);
1276                         if (copied) return(copied);
1277                         if (!sk->done) {
1278                                 sk->done = 1;
1279                                 return(0);
1280                         }
1281                         return(-ENOTCONN);
1282                 }
1283                  
1284                 if (sk->shutdown & RCV_SHUTDOWN) {
1285                         release_sock(sk);
1286                         if (copied == 0) 
1287                                 sk->done = 1;
1288                         return(copied);
1289                 }
1290 
1291                 if (nonblock || copied) {
1292                         release_sock(sk);
1293                         if (copied) return(copied);
1294                         return(-EAGAIN);
1295                 }
1296 
1297                 /* Now at this point, we may have gotten some data. */
1298                 release_sock(sk);
1299                 cli();
1300                 if ((sk->urg == 0 || skb_peek(&sk->rqueue) == NULL) &&
1301                     sk->err == 0 && !(sk->shutdown & RCV_SHUTDOWN)) {
1302                         interruptible_sleep_on(sk->sleep);
1303                         if (current->signal & ~current->blocked) {
1304                                 sti();
1305                                 if (copied) return(copied);
1306                                 return(-ERESTARTSYS);
1307                         }
1308                 }
1309                 sk->inuse = 1;
1310                 sti();
1311         }
1312 
1313         skb = skb_peek(&sk->rqueue);
1314         do {
1315                 int amt;
1316 
1317                 if (skb->h.th->urg && !skb->urg_used) {
1318                         if (skb->h.th->urg_ptr == 0) {
1319                                 skb->h.th->urg_ptr = ntohs(skb->len);
1320                         }
1321                         amt = min(ntohs(skb->h.th->urg_ptr),len);
1322                         if(amt)
1323                         {
1324                                 memcpy_tofs(to,(unsigned char *)(skb->h.th) +
1325                                                         skb->h.th->doff*4, amt);
1326                         }
1327 
1328                         if (!(flags & MSG_PEEK)) {
1329                                 skb->urg_used = 1;
1330                                 sk->urg--;
1331                         }
1332                         release_sock(sk);
1333                         copied += amt;
1334                         return(copied);
1335                 }
1336                 skb =(struct sk_buff *)skb->next;
1337         } while(skb != sk->rqueue);
1338   }
1339 /*sk->urg = 0;*/
1340   release_sock(sk);
1341   return(0);
1342 }
1343 
1344 
1345 /* This routine copies from a sock struct into the user buffer. */
1346 static int
1347 tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1348          int len, int nonblock, unsigned flags)
1349 {
1350   int copied=0; /* will be used to say how much has been copied. */
1351   struct sk_buff *skb;
1352   unsigned long offset;
1353   unsigned long used;
1354   int err;
1355 
1356   if (len == 0) return(0);
1357   if (len < 0) {
1358         return(-EINVAL);
1359   }
1360     
1361   err=verify_area(VERIFY_WRITE,to,len);
1362   if(err)
1363         return err;
1364         
1365   /* This error should be checked. */
1366   if (sk->state == TCP_LISTEN) return(-ENOTCONN);
1367 
1368   /* Urgent data needs to be handled specially. */
1369   if ((flags & MSG_OOB)) 
1370         return(tcp_read_urg(sk, nonblock, to, len, flags));
1371 
1372   /* So no-one else will use this socket. */
1373   sk->inuse = 1;
1374   
1375   skb=skb_peek(&sk->rqueue);
1376 
1377   DPRINTF((DBG_TCP, "tcp_read(sk=%X, to=%X, len=%d, nonblock=%d, flags=%X)\n",
1378                                                 sk, to, len, nonblock, flags));
1379 
1380   while(len > 0) {
1381         /* skb->used just checks to see if we've gone all the way around. */
1382         
1383         /* While no data, or first data indicates some is missing, or data is used */
1384         while(skb == NULL ||
1385               before(sk->copied_seq+1, skb->h.th->seq) || skb->used) {
1386                 DPRINTF((DBG_TCP, "skb = %X:\n", skb));
1387                 cleanup_rbuf(sk);
1388                 if (sk->err) 
1389                 {
1390                         int tmp;
1391 
1392                         release_sock(sk);
1393                         if (copied) 
1394                         {
1395                                 DPRINTF((DBG_TCP, "tcp_read: returning %d\n",
1396                                                                         copied));
1397                                 return(copied);
1398                         }
1399                         tmp = -sk->err;
1400                         sk->err = 0;
1401                         return(tmp);
1402                 }
1403 
1404                 if (sk->state == TCP_CLOSE) 
1405                 {
1406                         release_sock(sk);
1407                         if (copied) {
1408                                 DPRINTF((DBG_TCP, "tcp_read: returning %d\n",
1409                                                                 copied));
1410                                 return(copied);
1411                         }
1412                         if (!sk->done) {
1413                                 sk->done = 1;
1414                                 return(0);
1415                         }
1416                         return(-ENOTCONN);
1417                 }
1418 
1419                 if (sk->shutdown & RCV_SHUTDOWN) 
1420                 {
1421                         release_sock(sk);
1422                         if (copied == 0) sk->done = 1;
1423                         DPRINTF((DBG_TCP, "tcp_read: returning %d\n", copied));
1424                         return(copied);
1425                 }
1426                         
1427                 if (nonblock || copied) 
1428                 {
1429                         release_sock(sk);
1430                         if(sk->debug)
1431                                 printk("read: EAGAIN\n");
1432                         if (copied) 
1433                         {
1434                                 DPRINTF((DBG_TCP, "tcp_read: returning %d\n",
1435                                                                 copied));
1436                                 return(copied);
1437                         }
1438                         return(-EAGAIN);
1439                 }
1440 
1441                 if ((flags & MSG_PEEK) && copied != 0) 
1442                 {
1443                         release_sock(sk);
1444                         DPRINTF((DBG_TCP, "tcp_read: returning %d\n", copied));
1445                         return(copied);
1446                 }
1447                  
1448                 DPRINTF((DBG_TCP, "tcp_read about to sleep. state = %d\n",
1449                                                                 sk->state));
1450                 release_sock(sk);
1451 
1452                 /*
1453                  * Now we may have some data waiting or we could
1454                  * have changed state.
1455                  */
1456                 cli();
1457                 if (sk->shutdown & RCV_SHUTDOWN || sk->err != 0) {
1458                         sk->inuse = 1;
1459                         sti();
1460                         continue;
1461                 }
1462 
1463                 if (skb_peek(&sk->rqueue) == NULL ||
1464                     before(sk->copied_seq+1, sk->rqueue->h.th->seq)) {
1465                         if(sk->debug)
1466                                 printk("Read wait sleep\n");
1467                         interruptible_sleep_on(sk->sleep);
1468                         if(sk->debug)
1469                                 printk("Read wait wakes\n");
1470                         if (current->signal & ~current->blocked) {
1471                                 sti();
1472                                 if (copied) {
1473                                         DPRINTF((DBG_TCP, "tcp_read: returning %d\n",
1474                                                                 copied));
1475                                         return(copied);
1476                                 }
1477                                 return(-ERESTARTSYS);
1478                         }
1479                 }
1480                 sk->inuse = 1;
1481                 sti();
1482                 DPRINTF((DBG_TCP, "tcp_read woke up. \n"));
1483 
1484 
1485                 skb=skb_peek(&sk->rqueue);
1486                 /* That may have been null if we were beaten, if so we loop again */
1487         }
1488 
1489         /*
1490          * Copy anything from the current block that needs
1491          * to go into the user buffer.
1492          */
1493          offset = sk->copied_seq+1 - skb->h.th->seq;
1494   
1495          if (skb->h.th->syn) offset--;
1496          if (offset < skb->len) /* Some of the packet is useful */
1497          {
1498                 /*
1499                  * If there is urgent data we must either
1500                  * return or skip over it.
1501                  */
1502                 if (skb->h.th->urg) 
1503                 {
1504                         if (skb->urg_used) 
1505                         {
1506                                 sk->copied_seq += ntohs(skb->h.th->urg_ptr);
1507                                 offset += ntohs(skb->h.th->urg_ptr);
1508                                 if (offset >= skb->len) 
1509                                 {
1510                                         skb->used = 1;
1511                                         skb =(struct sk_buff *)skb->next;
1512                                         continue;
1513                                 }
1514                         } 
1515                         else 
1516                         {
1517                                 release_sock(sk);
1518                                 if (copied) 
1519                                         return(copied);
1520                                 send_sig(SIGURG, current, 0);
1521                                 return(-EINTR);
1522                         }
1523                 }
1524                 /* Ok so how much can we use ? */
1525                 used = min(skb->len - offset, len);
1526                 /* Copy it */
1527                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1528                             skb->h.th->doff*4 + offset, used);
1529                 copied += used;
1530                 len -= used;
1531                 to += used;
1532                 
1533                 /* If we were reading the data is 'eaten' */
1534                 if (!(flags & MSG_PEEK)) 
1535                         sk->copied_seq += used;
1536               
1537                 /*
1538                  * Mark this data used if we are really reading it,
1539                  * and if it doesn't contain any urgent data. And we
1540                  * have used all the data.
1541                  */
1542                 if (!(flags & MSG_PEEK) &&
1543                    (!skb->h.th->urg || skb->urg_used) &&
1544                    (used + offset >= skb->len)) 
1545                         skb->used = 1;
1546               
1547                 /*
1548                  * See if this is the end of a message or if the
1549                  * remaining data is urgent.
1550                  */
1551                 if (/*skb->h.th->psh || */skb->h.th->urg) 
1552                 {
1553                         break;
1554                 }
1555         } 
1556         else 
1557         {       /* already used this data, must be a retransmit */
1558                 skb->used = 1;
1559         }
1560         /* Move along a packet */
1561         skb =(struct sk_buff *)skb->next;
1562   }
1563   /* Clean up data we have read: This will do ACK frames */
1564   cleanup_rbuf(sk);
1565   release_sock(sk);
1566   DPRINTF((DBG_TCP, "tcp_read: returning %d\n", copied));
1567   if (copied == 0 && nonblock) 
1568         return(-EAGAIN);
1569   return(copied);
1570 }
1571 
1572   
1573 /*
1574  * Send a FIN without closing the connection.
1575  * Not called at interrupt time.
1576  */
1577 void
1578 tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1579 {
1580   struct sk_buff *buff;
1581   struct tcphdr *t1, *th;
1582   struct proto *prot;
1583   int tmp;
1584   struct device *dev = NULL;
1585 
1586   /*
1587    * We need to grab some memory, and put together a FIN,
1588    * and then put it into the queue to be sent.
1589    * FIXME:
1590    *    Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1591    *    Most of this is guesswork, so maybe it will work...
1592    */
1593   /* If we've already sent a FIN, return. */
1594   if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2) return;
1595   if (!(how & SEND_SHUTDOWN)) return;
1596   sk->inuse = 1;
1597 
1598   /* Clear out any half completed packets. */
1599   if (sk->partial)
1600         tcp_send_partial(sk);
1601 
1602   prot =(struct proto *)sk->prot;
1603   th =(struct tcphdr *)&sk->dummy_th;
1604   release_sock(sk); /* incase the malloc sleeps. */
1605   buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1606   if (buff == NULL) return;
1607   sk->inuse = 1;
1608 
1609   DPRINTF((DBG_TCP, "tcp_shutdown_send buff = %X\n", buff));
1610   buff->mem_addr = buff;
1611   buff->mem_len = MAX_RESET_SIZE;
1612   buff->sk = sk;
1613   buff->len = sizeof(*t1);
1614   t1 =(struct tcphdr *) buff->data;
1615 
1616   /* Put in the IP header and routing stuff. */
1617   tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1618                            IPPROTO_TCP, sk->opt,
1619                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1620   if (tmp < 0) {
1621         buff->free=1;
1622         prot->wfree(sk,buff->mem_addr, buff->mem_len);
1623         release_sock(sk);
1624         DPRINTF((DBG_TCP, "Unable to build header for fin.\n"));
1625         return;
1626   }
1627 
1628   t1 =(struct tcphdr *)((char *)t1 +tmp);
1629   buff->len += tmp;
1630   buff->dev = dev;
1631   memcpy(t1, th, sizeof(*t1));
1632   t1->seq = ntohl(sk->send_seq);
1633   sk->send_seq++;
1634   buff->h.seq = sk->send_seq;
1635   t1->ack = 1;
1636   t1->ack_seq = ntohl(sk->acked_seq);
1637   t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
1638   t1->fin = 1;
1639   t1->rst = 0;
1640   t1->doff = sizeof(*t1)/4;
1641   tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1642 
1643   /*
1644    * Can't just queue this up.
1645    * It should go at the end of the write queue.
1646    */
1647   if (sk->wback != NULL) {
1648         buff->free=0;   
1649         buff->next = NULL;
1650         sk->wback->next = buff;
1651         sk->wback = buff;
1652         buff->magic = TCP_WRITE_QUEUE_MAGIC;
1653   } else {
1654         sk->prot->queue_xmit(sk, dev, buff, 0);
1655   }
1656 
1657   if (sk->state == TCP_ESTABLISHED) sk->state = TCP_FIN_WAIT1;
1658     else sk->state = TCP_FIN_WAIT2;
1659 
1660   release_sock(sk);
1661 }
1662 
1663 
1664 static int
1665 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1666              int to_len, int nonblock, unsigned flags,
1667              struct sockaddr_in *addr, int *addr_len)
1668 {
1669   struct sockaddr_in sin;
1670   int len;
1671   int err;
1672   int result;
1673   
1674   /* Have to check these first unlike the old code. If 
1675      we check them after we lose data on an error
1676      which is wrong */
1677   err = verify_area(VERIFY_WRITE,addr_len,sizeof(long));
1678   if(err)
1679         return err;
1680   len = get_fs_long(addr_len);
1681   if(len > sizeof(sin))
1682         len = sizeof(sin);
1683   err=verify_area(VERIFY_WRITE, addr, len);  
1684   if(err)
1685         return err;
1686         
1687   result=tcp_read(sk, to, to_len, nonblock, flags);
1688 
1689   if (result < 0) return(result);
1690   
1691   sin.sin_family = AF_INET;
1692   sin.sin_port = sk->dummy_th.dest;
1693   sin.sin_addr.s_addr = sk->daddr;
1694 
1695   memcpy_tofs(addr, &sin, len);
1696   put_fs_long(len, addr_len);
1697   return(result);
1698 }
1699 
1700 
1701 /* This routine will send an RST to the other tcp. */
1702 static void
1703 tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1704           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1705 {
1706   struct sk_buff *buff;
1707   struct tcphdr *t1;
1708   int tmp;
1709 
1710   /*
1711    * We need to grab some memory, and put together an RST,
1712    * and then put it into the queue to be sent.
1713    */
1714   buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1715   if (buff == NULL) 
1716         return;
1717 
1718   DPRINTF((DBG_TCP, "tcp_reset buff = %X\n", buff));
1719   buff->mem_addr = buff;
1720   buff->mem_len = MAX_RESET_SIZE;
1721   buff->len = sizeof(*t1);
1722   buff->sk = NULL;
1723   buff->dev = dev;
1724 
1725   t1 =(struct tcphdr *) buff->data;
1726 
1727   /* Put in the IP header and routing stuff. */
1728   tmp = prot->build_header(buff, saddr, daddr, &dev, IPPROTO_TCP, opt,
1729                            sizeof(struct tcphdr),tos,ttl);
1730   if (tmp < 0) {
1731         buff->free = 1;
1732         prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1733         return;
1734   }
1735   t1 =(struct tcphdr *)((char *)t1 +tmp);
1736   buff->len += tmp;
1737   memcpy(t1, th, sizeof(*t1));
1738 
1739   /* Swap the send and the receive. */
1740   t1->dest = th->source;
1741   t1->source = th->dest;
1742   t1->rst = 1;  
1743   t1->window = 0;
1744   
1745   if(th->ack)
1746   {
1747         t1->ack=0;
1748         t1->seq=th->ack_seq;
1749         t1->ack_seq=0;
1750   }
1751   else
1752   {
1753         t1->ack=1;
1754         if(!th->syn)
1755                 t1->ack_seq=htonl(th->seq);
1756         else
1757                 t1->ack_seq=htonl(th->seq+1);
1758         t1->seq=0;
1759   }
1760 
1761   t1->syn = 0;
1762   t1->urg = 0;
1763   t1->fin = 0;
1764   t1->psh = 0;
1765   t1->doff = sizeof(*t1)/4;
1766   tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1767   prot->queue_xmit(NULL, dev, buff, 1);
1768 }
1769 
1770 
1771 /*
1772  *      Look for tcp options. Parses everything but only knows about MSS.
1773  *      This routine is always called with the packet containing the SYN.
1774  *      However it may also be called with the ack to the SYN.  So you
1775  *      can't assume this is always the SYN.  It's always called after
1776  *      we have set up sk->mtu to our own MTU.
1777  */
1778  
1779 static void
1780 tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1781 {
1782   unsigned char *ptr;
1783   int length=(th->doff*4)-sizeof(struct tcphdr);
1784   int mss_seen = 0;
1785     
1786   ptr = (unsigned char *)(th + 1);
1787   
1788   while(length>0)
1789   {
1790         int opcode=*ptr++;
1791         int opsize=*ptr++;
1792         switch(opcode)
1793         {
1794                 case TCPOPT_EOL:
1795                         return;
1796                 case TCPOPT_NOP:
1797                         length-=2;
1798                         continue;
1799                 
1800                 default:
1801                         if(opsize<=2)   /* Avoid silly options looping forever */
1802                                 return;
1803                         switch(opcode)
1804                         {
1805                                 case TCPOPT_MSS:
1806                                         if(opsize==4 && th->syn)
1807                                         {
1808                                                 sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1809                                                 mss_seen = 1;
1810                                         }
1811                                         break;
1812                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1813                         }
1814                         ptr+=opsize-2;
1815                         length-=opsize;
1816         }
1817   }
1818   if (th->syn) {
1819     if (! mss_seen)
1820       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1821   }
1822   sk->mss = min(sk->max_window, sk->mtu);
1823 }
1824 
1825 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1826 {
1827         dst = ntohl(dst);
1828         if (IN_CLASSA(dst))
1829                 return htonl(IN_CLASSA_NET);
1830         if (IN_CLASSB(dst))
1831                 return htonl(IN_CLASSB_NET);
1832         return htonl(IN_CLASSC_NET);
1833 }
1834 
1835 /*
1836  * This routine handles a connection request.
1837  * It should make sure we haven't already responded.
1838  * Because of the way BSD works, we have to send a syn/ack now.
1839  * This also means it will be harder to close a socket which is
1840  * listening.
1841  */
1842 static void
1843 tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1844                  unsigned long daddr, unsigned long saddr,
1845                  struct options *opt, struct device *dev)
1846 {
1847   struct sk_buff *buff;
1848   struct tcphdr *t1;
1849   unsigned char *ptr;
1850   struct sock *newsk;
1851   struct tcphdr *th;
1852   int tmp;
1853 
1854   DPRINTF((DBG_TCP, "tcp_conn_request(sk = %X, skb = %X, daddr = %X, sadd4= %X, \n"
1855           "                  opt = %X, dev = %X)\n",
1856           sk, skb, daddr, saddr, opt, dev));
1857   
1858   th = skb->h.th;
1859 
1860   /* If the socket is dead, don't accept the connection. */
1861   if (!sk->dead) {
1862         sk->data_ready(sk,0);
1863   } else {
1864         DPRINTF((DBG_TCP, "tcp_conn_request on dead socket\n"));
1865         tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1866         kfree_skb(skb, FREE_READ);
1867         return;
1868   }
1869 
1870   /*
1871    * Make sure we can accept more.  This will prevent a
1872    * flurry of syns from eating up all our memory.
1873    */
1874   if (sk->ack_backlog >= sk->max_ack_backlog) {
1875         kfree_skb(skb, FREE_READ);
1876         return;
1877   }
1878 
1879   /*
1880    * We need to build a new sock struct.
1881    * It is sort of bad to have a socket without an inode attached
1882    * to it, but the wake_up's will just wake up the listening socket,
1883    * and if the listening socket is destroyed before this is taken
1884    * off of the queue, this will take care of it.
1885    */
1886   newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
1887   if (newsk == NULL) {
1888         /* just ignore the syn.  It will get retransmitted. */
1889         kfree_skb(skb, FREE_READ);
1890         return;
1891   }
1892 
1893   DPRINTF((DBG_TCP, "newsk = %X\n", newsk));
1894   memcpy((void *)newsk,(void *)sk, sizeof(*newsk));
1895   newsk->wback = NULL;
1896   newsk->wfront = NULL;
1897   newsk->rqueue = NULL;
1898   newsk->send_head = NULL;
1899   newsk->send_tail = NULL;
1900   newsk->back_log = NULL;
1901   newsk->rtt = TCP_CONNECT_TIME << 3;
1902   newsk->rto = TCP_CONNECT_TIME;
1903   newsk->mdev = 0;
1904   newsk->max_window = 0;
1905   newsk->cong_window = 1;
1906   newsk->cong_count = 0;
1907   newsk->ssthresh = 0;
1908   newsk->backoff = 0;
1909   newsk->blog = 0;
1910   newsk->intr = 0;
1911   newsk->proc = 0;
1912   newsk->done = 0;
1913   newsk->partial = NULL;
1914   newsk->pair = NULL;
1915   newsk->wmem_alloc = 0;
1916   newsk->rmem_alloc = 0;
1917 
1918   newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
1919 
1920   newsk->err = 0;
1921   newsk->shutdown = 0;
1922   newsk->ack_backlog = 0;
1923   newsk->acked_seq = skb->h.th->seq+1;
1924   newsk->fin_seq = skb->h.th->seq;
1925   newsk->copied_seq = skb->h.th->seq;
1926   newsk->state = TCP_SYN_RECV;
1927   newsk->timeout = 0;
1928   newsk->send_seq = jiffies * SEQ_TICK - seq_offset;
1929   newsk->window_seq = newsk->send_seq;
1930   newsk->rcv_ack_seq = newsk->send_seq;
1931   newsk->urg =0;
1932   newsk->retransmits = 0;
1933   newsk->destroy = 0;
1934   newsk->timer.data = (unsigned long)newsk;
1935   newsk->timer.function = &net_timer;
1936   newsk->dummy_th.source = skb->h.th->dest;
1937   newsk->dummy_th.dest = skb->h.th->source;
1938 
1939   /* Swap these two, they are from our point of view. */
1940   newsk->daddr = saddr;
1941   newsk->saddr = daddr;
1942 
1943   put_sock(newsk->num,newsk);
1944   newsk->dummy_th.res1 = 0;
1945   newsk->dummy_th.doff = 6;
1946   newsk->dummy_th.fin = 0;
1947   newsk->dummy_th.syn = 0;
1948   newsk->dummy_th.rst = 0;
1949   newsk->dummy_th.psh = 0;
1950   newsk->dummy_th.ack = 0;
1951   newsk->dummy_th.urg = 0;
1952   newsk->dummy_th.res2 = 0;
1953   newsk->acked_seq = skb->h.th->seq + 1;
1954   newsk->copied_seq = skb->h.th->seq;
1955 
1956   /* Grab the ttl and tos values and use them */
1957   newsk->ip_ttl=sk->ip_ttl;
1958   newsk->ip_tos=skb->ip_hdr->tos;
1959 
1960 /* use 512 or whatever user asked for */
1961 /* note use of sk->user_mss, since user has no direct access to newsk */
1962   if (sk->user_mss)
1963     newsk->mtu = sk->user_mss;
1964   else {
1965 #ifdef SUBNETSARELOCAL
1966     if ((saddr ^ daddr) & default_mask(saddr))
1967 #else
1968     if ((saddr ^ daddr) & dev->pa_mask)
1969 #endif
1970       newsk->mtu = 576 - HEADER_SIZE;
1971     else
1972       newsk->mtu = MAX_WINDOW;
1973   }
1974 /* but not bigger than device MTU */
1975   newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
1976 
1977 /* this will min with what arrived in the packet */
1978   tcp_options(newsk,skb->h.th);
1979 
1980   buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
1981   if (buff == NULL) {
1982         sk->err = -ENOMEM;
1983         newsk->dead = 1;
1984         release_sock(newsk);
1985         kfree_skb(skb, FREE_READ);
1986         return;
1987   }
1988   
1989   buff->mem_addr = buff;
1990   buff->mem_len = MAX_SYN_SIZE;
1991   buff->len = sizeof(struct tcphdr)+4;
1992   buff->sk = newsk;
1993   
1994   t1 =(struct tcphdr *) buff->data;
1995 
1996   /* Put in the IP header and routing stuff. */
1997   tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &dev,
1998                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
1999 
2000   /* Something went wrong. */
2001   if (tmp < 0) {
2002         sk->err = tmp;
2003         buff->free=1;
2004         kfree_skb(buff,FREE_WRITE);
2005         newsk->dead = 1;
2006         release_sock(newsk);
2007         skb->sk = sk;
2008         kfree_skb(skb, FREE_READ);
2009         return;
2010   }
2011 
2012   buff->len += tmp;
2013   t1 =(struct tcphdr *)((char *)t1 +tmp);
2014   
2015   memcpy(t1, skb->h.th, sizeof(*t1));
2016   buff->h.seq = newsk->send_seq;
2017 
2018   /* Swap the send and the receive. */
2019   t1->dest = skb->h.th->source;
2020   t1->source = newsk->dummy_th.source;
2021   t1->seq = ntohl(newsk->send_seq++);
2022   t1->ack = 1;
2023   newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/
2024   t1->window = ntohs(newsk->window);
2025   t1->res1 = 0;
2026   t1->res2 = 0;
2027   t1->rst = 0;
2028   t1->urg = 0;
2029   t1->psh = 0;
2030   t1->syn = 1;
2031   t1->ack_seq = ntohl(skb->h.th->seq+1);
2032   t1->doff = sizeof(*t1)/4+1;
2033 
2034   ptr =(unsigned char *)(t1+1);
2035   ptr[0] = 2;
2036   ptr[1] = 4;
2037   ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2038   ptr[3] =(newsk->mtu) & 0xff;
2039 
2040   tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2041   newsk->prot->queue_xmit(newsk, dev, buff, 0);
2042 
2043   reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_CONNECT_TIME);
2044   skb->sk = newsk;
2045 
2046   /* Charge the sock_buff to newsk. */
2047   sk->rmem_alloc -= skb->mem_len;
2048   newsk->rmem_alloc += skb->mem_len;
2049 
2050   skb_queue_tail(&sk->rqueue,skb);
2051   sk->ack_backlog++;
2052   release_sock(newsk);
2053 }
2054 
2055 
2056 static void
2057 tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2058 {
2059   struct sk_buff *buff;
2060   int need_reset = 0;
2061   struct tcphdr *t1, *th;
2062   struct proto *prot;
2063   struct device *dev=NULL;
2064   int tmp;
2065 
2066   /*
2067    * We need to grab some memory, and put together a FIN,
2068    * and then put it into the queue to be sent.
2069    */
2070   DPRINTF((DBG_TCP, "tcp_close((struct sock *)%X, %d)\n",sk, timeout));
2071   sk->inuse = 1;
2072   sk->keepopen = 1;
2073   sk->shutdown = SHUTDOWN_MASK;
2074 
2075   if (!sk->dead) 
2076         sk->state_change(sk);
2077 
2078   /* We need to flush the recv. buffs. */
2079   if (skb_peek(&sk->rqueue) != NULL) 
2080   {
2081         struct sk_buff *skb;
2082         if(sk->debug)
2083                 printk("Clean rcv queue\n");
2084         while((skb=skb_dequeue(&sk->rqueue))!=NULL)
2085         {
2086                 if(skb->len > 0 && after(skb->h.th->seq + skb->len + 1 , sk->copied_seq))
2087                                 need_reset = 1;
2088                 kfree_skb(skb, FREE_READ);
2089         }
2090         if(sk->debug)
2091                 printk("Cleaned.\n");
2092   }
2093   sk->rqueue = NULL;
2094 
2095   /* Get rid off any half-completed packets. */
2096   if (sk->partial) {
2097         tcp_send_partial(sk);
2098   }
2099 
2100   switch(sk->state) {
2101         case TCP_FIN_WAIT1:
2102         case TCP_FIN_WAIT2:
2103         case TCP_LAST_ACK:
2104                 /* start a timer. */
2105                 /* original code was 4 * sk->rtt.  In converting to the
2106                  * new rtt representation, we can't quite use that.
2107                  * it seems to make most sense to  use the backed off value
2108                  */
2109                 reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2110                 if (timeout) tcp_time_wait(sk);
2111                 release_sock(sk);
2112                 return; /* break causes a double release - messy */
2113         case TCP_TIME_WAIT:
2114                 if (timeout) {
2115                   sk->state = TCP_CLOSE;
2116                 }
2117                 release_sock(sk);
2118                 return;
2119         case TCP_LISTEN:
2120                 sk->state = TCP_CLOSE;
2121                 release_sock(sk);
2122                 return;
2123         case TCP_CLOSE:
2124                 release_sock(sk);
2125                 return;
2126         case TCP_CLOSE_WAIT:
2127         case TCP_ESTABLISHED:
2128         case TCP_SYN_SENT:
2129         case TCP_SYN_RECV:
2130                 prot =(struct proto *)sk->prot;
2131                 th =(struct tcphdr *)&sk->dummy_th;
2132                 buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2133                 if (buff == NULL) {
2134                         /* This will force it to try again later. */
2135                         /* Or it would have if someone released the socket
2136                            first. Anyway it might work now */
2137                         release_sock(sk);
2138                         if (sk->state != TCP_CLOSE_WAIT)
2139                                         sk->state = TCP_ESTABLISHED;
2140                         reset_timer(sk, TIME_CLOSE, 100);
2141                         return;
2142                 }
2143                 buff->mem_addr = buff;
2144                 buff->mem_len = MAX_FIN_SIZE;
2145                 buff->sk = sk;
2146                 buff->free = 1;
2147                 buff->len = sizeof(*t1);
2148                 t1 =(struct tcphdr *) buff->data;
2149 
2150                 /* Put in the IP header and routing stuff. */
2151                 tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2152                                          IPPROTO_TCP, sk->opt,
2153                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2154                 if (tmp < 0) {
2155                         kfree_skb(buff,FREE_WRITE);
2156                         DPRINTF((DBG_TCP, "Unable to build header for fin.\n"));
2157                         release_sock(sk);
2158                         return;
2159                 }
2160 
2161                 t1 =(struct tcphdr *)((char *)t1 +tmp);
2162                 buff->len += tmp;
2163                 buff->dev = dev;
2164                 memcpy(t1, th, sizeof(*t1));
2165                 t1->seq = ntohl(sk->send_seq);
2166                 sk->send_seq++;
2167                 buff->h.seq = sk->send_seq;
2168                 t1->ack = 1;
2169 
2170                 /* Ack everything immediately from now on. */
2171                 sk->delay_acks = 0;
2172                 t1->ack_seq = ntohl(sk->acked_seq);
2173                 t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
2174                 t1->fin = 1;
2175                 t1->rst = need_reset;
2176                 t1->doff = sizeof(*t1)/4;
2177                 tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2178 
2179                 if (sk->wfront == NULL) {
2180                         prot->queue_xmit(sk, dev, buff, 0);
2181                 } else {
2182                         reset_timer(sk, TIME_WRITE, sk->rto);
2183                         buff->next = NULL;
2184                         if (sk->wback == NULL) {
2185                                 sk->wfront = buff;
2186                         } else {
2187                                 sk->wback->next = buff;
2188                         }
2189                         sk->wback = buff;
2190                         buff->magic = TCP_WRITE_QUEUE_MAGIC;
2191                 }
2192 
2193                 if (sk->state == TCP_CLOSE_WAIT) {
2194                         sk->state = TCP_FIN_WAIT2;
2195                 } else {
2196                         sk->state = TCP_FIN_WAIT1;
2197         }
2198   }
2199   release_sock(sk);
2200 }
2201 
2202 
2203 /*
2204  * This routine takes stuff off of the write queue,
2205  * and puts it in the xmit queue.
2206  */
2207 static void
2208 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2209 {
2210   struct sk_buff *skb;
2211 
2212   DPRINTF((DBG_TCP, "tcp_write_xmit(sk=%X)\n", sk));
2213 
2214   /* The bytes will have to remain here. In time closedown will
2215      empty the write queue and all will be happy */
2216   if(sk->zapped)
2217         return;
2218 
2219   while(sk->wfront != NULL &&
2220         before(sk->wfront->h.seq, sk->window_seq +1) &&
2221         (sk->retransmits == 0 ||
2222          sk->timeout != TIME_WRITE ||
2223          before(sk->wfront->h.seq, sk->rcv_ack_seq +1))
2224         && sk->packets_out < sk->cong_window) {
2225                 skb = sk->wfront;
2226                 IS_SKB(skb);
2227                 sk->wfront = skb->next;
2228                 if (sk->wfront == NULL) sk->wback = NULL;
2229                 skb->next = NULL;
2230                 if (skb->magic != TCP_WRITE_QUEUE_MAGIC) {
2231                         printk("tcp.c skb with bad magic(%X) on write queue. Squashing "
2232                                 "queue\n", skb->magic);
2233                         sk->wfront = NULL;
2234                         sk->wback = NULL;
2235                         return;
2236                 }
2237                 skb->magic = 0;
2238                 DPRINTF((DBG_TCP, "Sending a packet.\n"));
2239 
2240                 /* See if we really need to send the packet. */
2241                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) {
2242                         sk->retransmits = 0;
2243                         kfree_skb(skb, FREE_WRITE);
2244                         if (!sk->dead) sk->write_space(sk);
2245                 } else {
2246                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2247                 }
2248         }
2249 }
2250 
2251 
2252 /*
2253  * This routine sorts the send list, and resets the
2254  * sk->send_head and sk->send_tail pointers.
2255  */
2256 void
2257 sort_send(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2258 {
2259   struct sk_buff *list = NULL;
2260   struct sk_buff *skb,*skb2,*skb3;
2261 
2262   for (skb = sk->send_head; skb != NULL; skb = skb2) {
2263         skb2 = (struct sk_buff *)skb->link3;
2264         if (list == NULL || before (skb2->h.seq, list->h.seq)) {
2265                 skb->link3 = list;
2266                 sk->send_tail = skb;
2267                 list = skb;
2268         } else {
2269                 for (skb3 = list; ; skb3 = (struct sk_buff *)skb3->link3) {
2270                         if (skb3->link3 == NULL ||
2271                             before(skb->h.seq, skb3->link3->h.seq)) {
2272                                 skb->link3 = skb3->link3;
2273                                 skb3->link3 = skb;
2274                                 if (skb->link3 == NULL) sk->send_tail = skb;
2275                                 break;
2276                         }
2277                 }
2278         }
2279   }
2280   sk->send_head = list;
2281 }
2282   
2283 
2284 /* This routine deals with incoming acks, but not outgoing ones. */
2285 static int
2286 tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2287 {
2288   unsigned long ack;
2289   int flag = 0;
2290   /* 
2291    * 1 - there was data in packet as well as ack or new data is sent or 
2292    *     in shutdown state
2293    * 2 - data from retransmit queue was acked and removed
2294    * 4 - window shrunk or data from retransmit queue was acked and removed
2295    */
2296 
2297   if(sk->zapped)
2298         return(1);      /* Dead, cant ack any more so why bother */
2299 
2300   ack = ntohl(th->ack_seq);
2301   DPRINTF((DBG_TCP, "tcp_ack ack=%d, window=%d, "
2302           "sk->rcv_ack_seq=%d, sk->window_seq = %d\n",
2303           ack, ntohs(th->window), sk->rcv_ack_seq, sk->window_seq));
2304 
2305   if (ntohs(th->window) > sk->max_window) {
2306         sk->max_window = ntohs(th->window);
2307         sk->mss = min(sk->max_window, sk->mtu);
2308   }
2309 
2310   if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2311         sk->retransmits = 0;
2312 
2313   if (after(ack, sk->send_seq+1) || before(ack, sk->rcv_ack_seq-1)) {
2314         if (after(ack, sk->send_seq) ||
2315            (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) {
2316                 return(0);
2317         }
2318         if (sk->keepopen) {
2319                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2320         }
2321         return(1);
2322   }
2323 
2324   if (len != th->doff*4) flag |= 1;
2325 
2326   /* See if our window has been shrunk. */
2327   if (after(sk->window_seq, ack+ntohs(th->window))) {
2328         /*
2329          * We may need to move packets from the send queue
2330          * to the write queue, if the window has been shrunk on us.
2331          * The RFC says you are not allowed to shrink your window
2332          * like this, but if the other end does, you must be able
2333          * to deal with it.
2334          */
2335         struct sk_buff *skb;
2336         struct sk_buff *skb2;
2337         struct sk_buff *wskb = NULL;
2338   
2339         skb2 = sk->send_head;
2340         sk->send_head = NULL;
2341         sk->send_tail = NULL;
2342 
2343         flag |= 4;
2344 
2345         sk->window_seq = ack + ntohs(th->window);
2346         cli();
2347         while (skb2 != NULL) {
2348                 skb = skb2;
2349                 skb2 = (struct sk_buff *)skb->link3;
2350                 skb->link3 = NULL;
2351                 if (after(skb->h.seq, sk->window_seq)) {
2352                         if (sk->packets_out > 0) sk->packets_out--;
2353                         /* We may need to remove this from the dev send list. */
2354                         if (skb->next != NULL) {
2355                                 skb_unlink(skb);                                
2356                         }
2357                         /* Now add it to the write_queue. */
2358                         skb->magic = TCP_WRITE_QUEUE_MAGIC;
2359                         if (wskb == NULL) {
2360                                 skb->next = sk->wfront;
2361                                 sk->wfront = skb;
2362                         } else {
2363                                 skb->next = wskb->next;
2364                                 wskb->next = skb;
2365                         }
2366                         if (sk->wback == wskb) sk->wback = skb;
2367                         wskb = skb;
2368                 } else {
2369                         if (sk->send_head == NULL) {
2370                                 sk->send_head = skb;
2371                                 sk->send_tail = skb;
2372                         } else {
2373                                 sk->send_tail->link3 = skb;
2374                                 sk->send_tail = skb;
2375                         }
2376                         skb->link3 = NULL;
2377                 }
2378         }
2379         sti();
2380   }
2381 
2382   if (sk->send_tail == NULL || sk->send_head == NULL) {
2383         sk->send_head = NULL;
2384         sk->send_tail = NULL;
2385         sk->packets_out= 0;
2386   }
2387 
2388   sk->window_seq = ack + ntohs(th->window);
2389 
2390   /* We don't want too many packets out there. */
2391   if (sk->timeout == TIME_WRITE && 
2392       sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) {
2393 /* 
2394  * This is Jacobson's slow start and congestion avoidance. 
2395  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2396  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2397  * counter and increment it once every cwnd times.  It's possible
2398  * that this should be done only if sk->retransmits == 0.  I'm
2399  * interpreting "new data is acked" as including data that has
2400  * been retransmitted but is just now being acked.
2401  */
2402         if (sk->cong_window < sk->ssthresh)  
2403           /* in "safe" area, increase */
2404           sk->cong_window++;
2405         else {
2406           /* in dangerous area, increase slowly.  In theory this is
2407              sk->cong_window += 1 / sk->cong_window
2408            */
2409           if (sk->cong_count >= sk->cong_window) {
2410             sk->cong_window++;
2411             sk->cong_count = 0;
2412           } else 
2413             sk->cong_count++;
2414         }
2415   }
2416 
2417   DPRINTF((DBG_TCP, "tcp_ack: Updating rcv ack sequence.\n"));
2418   sk->rcv_ack_seq = ack;
2419 
2420   /*
2421    * if this ack opens up a zero window, clear backoff.  It was
2422    * being used to time the probes, and is probably far higher than
2423    * it needs to be for normal retransmission
2424    */
2425   if (sk->timeout == TIME_PROBE0) {
2426         if (sk->wfront != NULL &&   /* should always be non-null */
2427             ! before (sk->window_seq, sk->wfront->h.seq)) {
2428           sk->retransmits = 0;
2429           sk->backoff = 0;
2430           /* recompute rto from rtt.  this eliminates any backoff */
2431           sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2432           if (sk->rto > 120*HZ)
2433             sk->rto = 120*HZ;
2434           if (sk->rto < 1*HZ)
2435             sk->rto = 1*HZ;
2436         }
2437   }
2438 
2439   /* See if we can take anything off of the retransmit queue. */
2440   while(sk->send_head != NULL) {
2441         /* Check for a bug. */
2442         if (sk->send_head->link3 &&
2443             after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) {
2444                 printk("INET: tcp.c: *** bug send_list out of order.\n");
2445                 sort_send(sk);
2446         }
2447 
2448         if (before(sk->send_head->h.seq, ack+1)) {
2449                 struct sk_buff *oskb;
2450 
2451                 if (sk->retransmits) {
2452 
2453                   /* we were retransmitting.  don't count this in RTT est */
2454                   flag |= 2;
2455 
2456                   /*
2457                    * even though we've gotten an ack, we're still
2458                    * retransmitting as long as we're sending from
2459                    * the retransmit queue.  Keeping retransmits non-zero
2460                    * prevents us from getting new data interspersed with
2461                    * retransmissions.
2462                    */
2463 
2464                   if (sk->send_head->link3)
2465                     sk->retransmits = 1;
2466                   else
2467                     sk->retransmits = 0;
2468 
2469                 }
2470 
2471                 /*
2472                  * Note that we only reset backoff and rto in the
2473                  * rtt recomputation code.  And that doesn't happen
2474                  * if there were retransmissions in effect.  So the
2475                  * first new packet after the retransmissions is
2476                  * sent with the backoff still in effect.  Not until
2477                  * we get an ack from a non-retransmitted packet do
2478                  * we reset the backoff and rto.  This allows us to deal
2479                  * with a situation where the network delay has increased
2480                  * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2481                  */
2482 
2483                 /* We have one less packet out there. */
2484                 if (sk->packets_out > 0) sk->packets_out --;
2485                 DPRINTF((DBG_TCP, "skb=%X skb->h.seq = %d acked ack=%d\n",
2486                                 sk->send_head, sk->send_head->h.seq, ack));
2487 
2488                 /* Wake up the process, it can probably write more. */
2489                 if (!sk->dead) sk->write_space(sk);
2490 
2491                 oskb = sk->send_head;
2492 
2493                 if (!(flag&2)) {
2494                   long m;
2495 
2496                   /* The following amusing code comes from Jacobson's
2497                    * article in SIGCOMM '88.  Note that rtt and mdev
2498                    * are scaled versions of rtt and mean deviation.
2499                    * This is designed to be as fast as possible 
2500                    * m stands for "measurement".
2501                    */
2502 
2503                   m = jiffies - oskb->when;  /* RTT */
2504                   m -= (sk->rtt >> 3);       /* m is now error in rtt est */
2505                   sk->rtt += m;              /* rtt = 7/8 rtt + 1/8 new */
2506                   if (m < 0)
2507                     m = -m;                  /* m is now abs(error) */
2508                   m -= (sk->mdev >> 2);      /* similar update on mdev */
2509                   sk->mdev += m;             /* mdev = 3/4 mdev + 1/4 new */
2510 
2511                   /* now update timeout.  Note that this removes any backoff */
2512                   sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2513                   if (sk->rto > 120*HZ)
2514                     sk->rto = 120*HZ;
2515                   if (sk->rto < 1*HZ)
2516                     sk->rto = 1*HZ;
2517                   sk->backoff = 0;
2518 
2519                 }
2520                 flag |= (2|4);
2521 
2522                 cli();
2523 
2524                 oskb = sk->send_head;
2525                 IS_SKB(oskb);
2526                 sk->send_head =(struct sk_buff *)oskb->link3;
2527                 if (sk->send_head == NULL) {
2528                         sk->send_tail = NULL;
2529                 }
2530 
2531                 /* We may need to remove this from the dev send list. */                
2532                 skb_unlink(oskb);       /* Much easier! */
2533                 sti();
2534                 oskb->magic = 0;
2535                 kfree_skb(oskb, FREE_WRITE); /* write. */
2536                 if (!sk->dead) sk->write_space(sk);
2537         } else {
2538                 break;
2539         }
2540   }
2541 
2542   /*
2543    * Maybe we can take some stuff off of the write queue,
2544    * and put it onto the xmit queue.
2545    */
2546   if (sk->wfront != NULL) {
2547         if (after (sk->window_seq+1, sk->wfront->h.seq) &&
2548                 (sk->retransmits == 0 || 
2549                  sk->timeout != TIME_WRITE ||
2550                  before(sk->wfront->h.seq, sk->rcv_ack_seq +1))
2551                 && sk->packets_out < sk->cong_window) {
2552                 flag |= 1;
2553                 tcp_write_xmit(sk);
2554         } else if (before(sk->window_seq, sk->wfront->h.seq) &&
2555                    sk->send_head == NULL &&
2556                    sk->ack_backlog == 0 &&
2557                    sk->state != TCP_TIME_WAIT) {
2558                 reset_timer(sk, TIME_PROBE0, sk->rto);
2559         }               
2560   } else {
2561         if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2562             sk->state != TCP_TIME_WAIT && !sk->keepopen) {
2563                 DPRINTF((DBG_TCP, "Nothing to do, going to sleep.\n")); 
2564                 if (!sk->dead) sk->write_space(sk);
2565 
2566                 if (sk->keepopen)
2567                         reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2568                 else
2569                         delete_timer(sk);
2570         } else {
2571                 if (sk->state != (unsigned char) sk->keepopen) {
2572                         reset_timer(sk, TIME_WRITE, sk->rto);
2573                 }
2574                 if (sk->state == TCP_TIME_WAIT) {
2575                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2576                 }
2577         }
2578   }
2579 
2580   if (sk->packets_out == 0 && sk->partial != NULL &&
2581       sk->wfront == NULL && sk->send_head == NULL) {
2582         flag |= 1;
2583         tcp_send_partial(sk);
2584   }
2585 
2586   /* See if we are done. */
2587   if (sk->state == TCP_TIME_WAIT) {
2588         if (!sk->dead)
2589                 sk->state_change(sk);
2590         if (sk->rcv_ack_seq == sk->send_seq && sk->acked_seq == sk->fin_seq) {
2591                 flag |= 1;
2592                 sk->state = TCP_CLOSE;
2593                 sk->shutdown = SHUTDOWN_MASK;
2594         }
2595   }
2596 
2597   if (sk->state == TCP_LAST_ACK || sk->state == TCP_FIN_WAIT2) {
2598         if (!sk->dead) sk->state_change(sk);
2599         if (sk->rcv_ack_seq == sk->send_seq) {
2600                 flag |= 1;
2601                 if (sk->acked_seq != sk->fin_seq) {
2602                         tcp_time_wait(sk);
2603                 } else {
2604                         DPRINTF((DBG_TCP, "tcp_ack closing socket - %X\n", sk));
2605                         tcp_send_ack(sk->send_seq, sk->acked_seq, sk,
2606                                      th, sk->daddr);
2607                         sk->shutdown = SHUTDOWN_MASK;
2608                         sk->state = TCP_CLOSE;
2609                 }
2610         }
2611   }
2612 
2613 /*
2614  * I make no guarantees about the first clause in the following
2615  * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
2616  * what conditions "!flag" would be true.  However I think the rest
2617  * of the conditions would prevent that from causing any
2618  * unnecessary retransmission. 
2619  *   Clearly if the first packet has expired it should be 
2620  * retransmitted.  The other alternative, "flag&2 && retransmits", is
2621  * harder to explain:  You have to look carefully at how and when the
2622  * timer is set and with what timeout.  The most recent transmission always
2623  * sets the timer.  So in general if the most recent thing has timed
2624  * out, everything before it has as well.  So we want to go ahead and
2625  * retransmit some more.  If we didn't explicitly test for this
2626  * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
2627  * would not be true.  If you look at the pattern of timing, you can
2628  * show that rto is increased fast enough that the next packet would
2629  * almost never be retransmitted immediately.  Then you'd end up
2630  * waiting for a timeout to send each packet on the retranmission
2631  * queue.  With my implementation of the Karn sampling algorithm,
2632  * the timeout would double each time.  The net result is that it would
2633  * take a hideous amount of time to recover from a single dropped packet.
2634  * It's possible that there should also be a test for TIME_WRITE, but
2635  * I think as long as "send_head != NULL" and "retransmit" is on, we've
2636  * got to be in real retransmission mode.
2637  *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
2638  * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
2639  * As long as no further losses occur, this seems reasonable.
2640  */
2641 
2642   if (((!flag) || (flag&4)) && sk->send_head != NULL &&
2643       (((flag&2) && sk->retransmits) ||
2644        (sk->send_head->when + sk->rto < jiffies))) {
2645         ip_do_retransmit(sk, 1);
2646         reset_timer(sk, TIME_WRITE, sk->rto);
2647       }
2648 
2649   DPRINTF((DBG_TCP, "leaving tcp_ack\n"));
2650   return(1);
2651 }
2652 
2653 
2654 /*
2655  * This routine handles the data.  If there is room in the buffer,
2656  * it will be have already been moved into it.  If there is no
2657  * room, then we will just have to discard the packet.
2658  */
2659 static int
2660 tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
2661          unsigned long saddr, unsigned short len)
2662 {
2663   struct sk_buff *skb1, *skb2;
2664   struct tcphdr *th;
2665   int dup_dumped=0;
2666 
2667   th = skb->h.th;
2668   print_th(th);
2669   skb->len = len -(th->doff*4);
2670 
2671   DPRINTF((DBG_TCP, "tcp_data len = %d sk = %X:\n", skb->len, sk));
2672 
2673   sk->bytes_rcv += skb->len;
2674   if (skb->len == 0 && !th->fin && !th->urg && !th->psh) {
2675         /* Don't want to keep passing ack's back and forth. */
2676         if (!th->ack) tcp_send_ack(sk->send_seq, sk->acked_seq,sk, th, saddr);
2677         kfree_skb(skb, FREE_READ);
2678         return(0);
2679   }
2680 
2681   if (sk->shutdown & RCV_SHUTDOWN) {
2682         sk->acked_seq = th->seq + skb->len + th->syn + th->fin;
2683         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
2684         sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
2685         sk->state = TCP_CLOSE;
2686         sk->err = EPIPE;
2687         sk->shutdown = SHUTDOWN_MASK;
2688         DPRINTF((DBG_TCP, "tcp_data: closing socket - %X\n", sk));
2689         kfree_skb(skb, FREE_READ);
2690         if (!sk->dead) sk->state_change(sk);
2691         return(0);
2692   }
2693 
2694   /*
2695    * Now we have to walk the chain, and figure out where this one
2696    * goes into it.  This is set up so that the last packet we received
2697    * will be the first one we look at, that way if everything comes
2698    * in order, there will be no performance loss, and if they come
2699    * out of order we will be able to fit things in nicely.
2700    */
2701 
2702   /* This should start at the last one, and then go around forwards. */
2703   if (sk->rqueue == NULL) {
2704         DPRINTF((DBG_TCP, "tcp_data: skb = %X:\n", skb));
2705 #ifdef OLDWAY
2706         sk->rqueue = skb;
2707         skb->next = skb;
2708         skb->prev = skb;
2709         skb->list = &sk->rqueue;
2710 #else
2711         skb_queue_head(&sk->rqueue,skb);
2712 #endif          
2713         skb1= NULL;
2714   } else {
2715         DPRINTF((DBG_TCP, "tcp_data adding to chain sk = %X:\n", sk));
2716         for(skb1=sk->rqueue->prev; ; skb1 =(struct sk_buff *)skb1->prev) {
2717                 if(sk->debug)
2718                 {
2719                         printk("skb1=%p :", skb1);
2720                         printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
2721                         printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
2722                         printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
2723                                         sk->acked_seq);
2724                 }
2725 #ifdef OLD              
2726                 if (after(th->seq+1, skb1->h.th->seq)) {
2727                         skb->prev = skb1;
2728                         skb->next = skb1->next;
2729                         skb->next->prev = skb;
2730                         skb1->next = skb;
2731                         if (skb1 == sk->rqueue) sk->rqueue = skb;
2732                         break;
2733                 }
2734                 if (skb1->prev == sk->rqueue) {
2735                         skb->next= skb1;
2736                         skb->prev = skb1->prev;
2737                         skb->prev->next = skb;
2738                         skb1->prev = skb;
2739                         skb1 = NULL; /* so we know we might be able
2740                                         to ack stuff. */
2741                         break;
2742                 }
2743 #else
2744                 if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
2745                 {
2746                         skb_append(skb1,skb);
2747                         skb_unlink(skb1);
2748                         kfree_skb(skb1,FREE_READ);
2749                         dup_dumped=1;
2750                         skb1=NULL;
2751                         break;
2752                 }
2753                 if (after(th->seq+1, skb1->h.th->seq))
2754                 {
2755                         skb_append(skb1,skb);
2756                         break;
2757                 }
2758                 if (skb1 == sk->rqueue)
2759                 {
2760                         skb_queue_head(&sk->rqueue, skb);               
2761                         break;
2762                 }
2763 #endif          
2764         }
2765         DPRINTF((DBG_TCP, "skb = %X:\n", skb));
2766   }
2767 
2768   th->ack_seq = th->seq + skb->len;
2769   if (th->syn) th->ack_seq++;
2770   if (th->fin) th->ack_seq++;
2771 
2772   if (before(sk->acked_seq, sk->copied_seq)) {
2773         printk("*** tcp.c:tcp_data bug acked < copied\n");
2774         sk->acked_seq = sk->copied_seq;
2775   }
2776 
2777   /* Now figure out if we can ack anything. */
2778   if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) {
2779       if (before(th->seq, sk->acked_seq+1)) {
2780                 if (after(th->ack_seq, sk->acked_seq))
2781                                         sk->acked_seq = th->ack_seq;
2782                 skb->acked = 1;
2783 
2784                 /* When we ack the fin, we turn on the RCV_SHUTDOWN flag. */
2785                 if (skb->h.th->fin) {
2786                         if (!sk->dead) sk->state_change(sk);
2787                         sk->shutdown |= RCV_SHUTDOWN;
2788                 }
2789           
2790                 for(skb2 = (struct sk_buff *)skb->next;
2791                     skb2 !=(struct sk_buff *) sk->rqueue;
2792                     skb2 = (struct sk_buff *)skb2->next) {
2793                         if (before(skb2->h.th->seq, sk->acked_seq+1)) {
2794                                 if (after(skb2->h.th->ack_seq, sk->acked_seq))
2795                                 {
2796                                         long old_acked_seq = sk->acked_seq;
2797                                         sk->acked_seq = skb2->h.th->ack_seq;
2798                                         if((int)(sk->acked_seq - old_acked_seq) >0)
2799                                         {
2800                                                 int new_window=sk->window-sk->acked_seq+
2801                                                         old_acked_seq;
2802                                                 if(new_window<0)
2803                                                         new_window=0;
2804                                                 sk->window = new_window;
2805                                         }
2806                                 }
2807                                 skb2->acked = 1;
2808 
2809                                 /*
2810                                  * When we ack the fin, we turn on
2811                                  * the RCV_SHUTDOWN flag.
2812                                  */
2813                                 if (skb2->h.th->fin) {
2814                                         sk->shutdown |= RCV_SHUTDOWN;
2815                                         if (!sk->dead) sk->state_change(sk);
2816                                 }
2817 
2818                                 /* Force an immediate ack. */
2819                                 sk->ack_backlog = sk->max_ack_backlog;
2820                         } else {
2821                                 break;
2822                         }
2823                 }
2824 
2825                 /*
2826                  * This also takes care of updating the window.
2827                  * This if statement needs to be simplified.
2828                  */
2829                 if (!sk->delay_acks ||
2830                     sk->ack_backlog >= sk->max_ack_backlog || 
2831                     sk->bytes_rcv > sk->max_unacked || th->fin) {
2832 /*                      tcp_send_ack(sk->send_seq, sk->acked_seq,sk,th, saddr); */
2833                 } else {
2834                         sk->ack_backlog++;
2835                         if(sk->debug)
2836                                 printk("Ack queued.\n");
2837                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
2838                 }
2839         }
2840   }
2841 
2842   /*
2843    * If we've missed a packet, send an ack.
2844    * Also start a timer to send another.
2845    */
2846   if (!skb->acked) {
2847         /*
2848          * This is important.  If we don't have much room left,
2849          * we need to throw out a few packets so we have a good
2850          * window.  Note that mtu is used, not mss, because mss is really
2851          * for the send side.  He could be sending us stuff as large as mtu.
2852          */
2853         while (sk->prot->rspace(sk) < sk->mtu) {
2854                 skb1 = skb_peek(&sk->rqueue);
2855                 if (skb1 == NULL) {
2856                         printk("INET: tcp.c:tcp_data memory leak detected.\n");
2857                         break;
2858                 }
2859 
2860                 /* Don't throw out something that has been acked. */
2861                 if (skb1->acked) {
2862                         break;
2863                 }
2864                 
2865                 skb_unlink(skb1);
2866 #ifdef OLDWAY           
2867                 if (skb1->prev == skb1) {
2868                         sk->rqueue = NULL;
2869                 } else {
2870                         sk->rqueue = (struct sk_buff *)skb1->prev;
2871                         skb1->next->prev = skb1->prev;
2872                         skb1->prev->next = skb1->next;
2873                 }
2874 #endif          
2875                 kfree_skb(skb1, FREE_READ);
2876         }
2877         tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
2878         sk->ack_backlog++;
2879         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
2880   } else {
2881         /* We missed a packet.  Send an ack to try to resync things. */
2882         tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
2883   }
2884 
2885   /* Now tell the user we may have some data. */
2886   if (!sk->dead) {
2887         if(sk->debug)
2888                 printk("Data wakeup.\n");
2889         sk->data_ready(sk,0);
2890   } else {
2891         DPRINTF((DBG_TCP, "data received on dead socket.\n"));
2892   }
2893 
2894   if (sk->state == TCP_FIN_WAIT2 &&
2895       sk->acked_seq == sk->fin_seq && sk->rcv_ack_seq == sk->send_seq) {
2896         DPRINTF((DBG_TCP, "tcp_data: entering last_ack state sk = %X\n", sk));
2897 
2898 /*      tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr); */
2899         sk->shutdown = SHUTDOWN_MASK;
2900         sk->state = TCP_LAST_ACK;
2901         if (!sk->dead) sk->state_change(sk);
2902   }
2903 
2904   return(0);
2905 }
2906 
2907 
2908 static int
2909 tcp_urg(struct sock *sk, struct tcphdr *th, unsigned long saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
2910 {
2911   extern int kill_pg(int pg, int sig, int priv);
2912   extern int kill_proc(int pid, int sig, int priv);
2913     
2914   if (!sk->dead) 
2915         sk->data_ready(sk,0);
2916     
2917   if (sk->urginline) {
2918         th->urg = 0;
2919         th->psh = 1;
2920         return(0);
2921   }
2922 
2923   if (!sk->urg) {
2924         /* So if we get more urgent data, we don't signal the user again. */
2925         if (sk->proc != 0) {
2926                 if (sk->proc > 0) {
2927                         kill_proc(sk->proc, SIGURG, 1);
2928                 } else {
2929                         kill_pg(-sk->proc, SIGURG, 1);
2930                 }
2931         }
2932   }
2933   sk->urg++;
2934   return(0);
2935 }
2936 
2937 
2938 /* This deals with incoming fins. 'Linus at 9 O'clock' 8-) */
2939 static int
2940 tcp_fin(struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
2941          unsigned long saddr, struct device *dev)
2942 {
2943   DPRINTF((DBG_TCP, "tcp_fin(sk=%X, th=%X, saddr=%X, dev=%X)\n",
2944                                                 sk, th, saddr, dev));
2945   
2946   if (!sk->dead) {
2947         sk->state_change(sk);
2948   }
2949 
2950   switch(sk->state) {
2951         case TCP_SYN_RECV:
2952         case TCP_SYN_SENT:
2953         case TCP_ESTABLISHED:
2954                 /* Contains the one that needs to be acked */
2955                 sk->fin_seq = th->seq+1;
2956                 sk->state = TCP_CLOSE_WAIT;
2957                 if (th->rst) sk->shutdown = SHUTDOWN_MASK;
2958                 break;
2959 
2960         case TCP_CLOSE_WAIT:
2961         case TCP_FIN_WAIT2:
2962                 break; /* we got a retransmit of the fin. */
2963 
2964         case TCP_FIN_WAIT1:
2965                 /* Contains the one that needs to be acked */
2966                 sk->fin_seq = th->seq+1;
2967                 sk->state = TCP_FIN_WAIT2;
2968                 break;
2969 
2970         default:
2971         case TCP_TIME_WAIT:
2972                 sk->state = TCP_LAST_ACK;
2973 
2974                 /* Start the timers. */
2975                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2976                 return(0);
2977   }
2978   sk->ack_backlog++;
2979 
2980   return(0);
2981 }
2982 
2983 
2984 /* This will accept the next outstanding connection. */
2985 static struct sock *
2986 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
2987 {
2988   struct sock *newsk;
2989   struct sk_buff *skb;
2990   
2991   DPRINTF((DBG_TCP, "tcp_accept(sk=%X, flags=%X, addr=%s)\n",
2992                                 sk, flags, in_ntoa(sk->saddr)));
2993 
2994   /*
2995    * We need to make sure that this socket is listening,
2996    * and that it has something pending.
2997    */
2998   if (sk->state != TCP_LISTEN) {
2999         sk->err = EINVAL;
3000         return(NULL); 
3001   }
3002 
3003   /* avoid the race. */
3004   cli();
3005   sk->inuse = 1;
3006   while((skb = get_firstr(sk)) == NULL) {
3007         if (flags & O_NONBLOCK) {
3008                 sti();
3009                 release_sock(sk);
3010                 sk->err = EAGAIN;
3011                 return(NULL);
3012         }
3013 
3014         release_sock(sk);
3015         interruptible_sleep_on(sk->sleep);
3016         if (current->signal & ~current->blocked) {
3017                 sti();
3018                 sk->err = ERESTARTSYS;
3019                 return(NULL);
3020         }
3021         sk->inuse = 1;
3022   }
3023   sti();
3024 
3025   /* Now all we need to do is return skb->sk. */
3026   newsk = skb->sk;
3027 
3028   kfree_skb(skb, FREE_READ);
3029   sk->ack_backlog--;
3030   release_sock(sk);
3031   return(newsk);
3032 }
3033 
3034 
3035 /* This will initiate an outgoing connection. */
3036 static int
3037 tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3038 {
3039   struct sk_buff *buff;
3040   struct sockaddr_in sin;
3041   struct device *dev=NULL;
3042   unsigned char *ptr;
3043   int tmp;
3044   struct tcphdr *t1;
3045   int err;
3046 
3047   if (sk->state != TCP_CLOSE) return(-EISCONN);
3048   if (addr_len < 8) return(-EINVAL);
3049 
3050   err=verify_area(VERIFY_READ, usin, addr_len);
3051   if(err)
3052         return err;
3053         
3054   memcpy_fromfs(&sin,usin, min(sizeof(sin), addr_len));
3055 
3056   if (sin.sin_family && sin.sin_family != AF_INET) return(-EAFNOSUPPORT);
3057 
3058   DPRINTF((DBG_TCP, "TCP connect daddr=%s\n", in_ntoa(sin.sin_addr.s_addr)));
3059   
3060   /* Don't want a TCP connection going to a broadcast address */
3061   if (chk_addr(sin.sin_addr.s_addr) == IS_BROADCAST) { 
3062         DPRINTF((DBG_TCP, "TCP connection to broadcast address not allowed\n"));
3063         return(-ENETUNREACH);
3064   }
3065   
3066   /* Connect back to the same socket: Blows up so disallow it */
3067   if(sk->saddr == sin.sin_addr.s_addr && sk->num==ntohs(sin.sin_port))
3068         return -EBUSY;
3069 
3070   sk->inuse = 1;
3071   sk->daddr = sin.sin_addr.s_addr;
3072   sk->send_seq = jiffies * SEQ_TICK - seq_offset;
3073   sk->window_seq = sk->send_seq;
3074   sk->rcv_ack_seq = sk->send_seq -1;
3075   sk->err = 0;
3076   sk->dummy_th.dest = sin.sin_port;
3077   release_sock(sk);
3078 
3079   buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3080   if (buff == NULL) {
3081         return(-ENOMEM);
3082   }
3083   sk->inuse = 1;
3084   buff->mem_addr = buff;
3085   buff->mem_len = MAX_SYN_SIZE;
3086   buff->len = 24;
3087   buff->sk = sk;
3088   buff->free = 1;
3089   t1 = (struct tcphdr *) buff->data;
3090 
3091   /* Put in the IP header and routing stuff. */
3092   /* We need to build the routing stuff fromt the things saved in skb. */
3093   tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3094                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3095   if (tmp < 0) {
3096         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3097         release_sock(sk);
3098         return(-ENETUNREACH);
3099   }
3100   buff->len += tmp;
3101   t1 = (struct tcphdr *)((char *)t1 +tmp);
3102 
3103   memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3104   t1->seq = ntohl(sk->send_seq++);
3105   buff->h.seq = sk->send_seq;
3106   t1->ack = 0;
3107   t1->window = 2;
3108   t1->res1=0;
3109   t1->res2=0;
3110   t1->rst = 0;
3111   t1->urg = 0;
3112   t1->psh = 0;
3113   t1->syn = 1;
3114   t1->urg_ptr = 0;
3115   t1->doff = 6;
3116 
3117 /* use 512 or whatever user asked for */
3118   if (sk->user_mss)
3119     sk->mtu = sk->user_mss;
3120   else {
3121 #ifdef SUBNETSARELOCAL
3122     if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3123 #else
3124     if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3125 #endif
3126       sk->mtu = 576 - HEADER_SIZE;
3127     else
3128       sk->mtu = MAX_WINDOW;
3129   }
3130 /* but not bigger than device MTU */
3131   sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3132 
3133   /* Put in the TCP options to say MTU. */
3134   ptr = (unsigned char *)(t1+1);
3135   ptr[0] = 2;
3136   ptr[1] = 4;
3137   ptr[2] = (sk->mtu) >> 8;
3138   ptr[3] = (sk->mtu) & 0xff;
3139   tcp_send_check(t1, sk->saddr, sk->daddr,
3140                   sizeof(struct tcphdr) + 4, sk);
3141 
3142   /* This must go first otherwise a really quick response will get reset. */
3143   sk->state = TCP_SYN_SENT;
3144   sk->rtt = TCP_CONNECT_TIME;
3145   reset_timer(sk, TIME_WRITE, TCP_CONNECT_TIME);        /* Timer for repeating the SYN until an answer */
3146   sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3147 
3148   sk->prot->queue_xmit(sk, dev, buff, 0);  
3149   
3150   release_sock(sk);
3151   return(0);
3152 }
3153 
3154 
3155 /* This functions checks to see if the tcp header is actually acceptible. */
3156 static int
3157 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3158              struct options *opt, unsigned long saddr, struct device *dev)
3159 {
3160   /*
3161    * This isn't quite right.  sk->acked_seq could be more recent
3162    * than sk->window.  This is however close enough.  We will accept
3163    * slightly more packets than we should, but it should not cause
3164    * problems unless someone is trying to forge packets.
3165    */
3166   DPRINTF((DBG_TCP, "tcp_sequence(sk=%X, th=%X, len = %d, opt=%d, saddr=%X)\n",
3167           sk, th, len, opt, saddr));
3168 
3169   if (between(th->seq, sk->acked_seq, sk->acked_seq + sk->window)||
3170       between(th->seq + len-(th->doff*4), sk->acked_seq + 1,
3171               sk->acked_seq + sk->window) ||
3172      (before(th->seq, sk->acked_seq) &&
3173        after(th->seq + len -(th->doff*4), sk->acked_seq + sk->window))) {
3174        return(1);
3175    }
3176   DPRINTF((DBG_TCP, "tcp_sequence: rejecting packet.\n"));
3177 
3178   /*
3179    *    Send a reset if we get something not ours and we are
3180    *    unsynchronized. Note: We don't do anything to our end. We
3181    *    are just killing the bogus remote connection then we will
3182    *    connect again and it will work (with luck).
3183    */
3184          
3185   if(sk->state==TCP_SYN_SENT||sk->state==TCP_SYN_RECV)
3186   {
3187         tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3188         return(1);
3189   }
3190 
3191   /*
3192    * If it's too far ahead, send an ack to let the
3193    * other end know what we expect.
3194    */
3195   if (after(th->seq, sk->acked_seq + sk->window)) {
3196         if(!th->rst)
3197                 tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
3198         return(0);
3199   }
3200 
3201 #ifdef undef
3202 /*
3203  * if we do this, we won't respond to keepalive packets, since those
3204  * are slightly out of window, and we have to generate an ack
3205  * a late ack out still not to have a sequence number less than
3206  * one we've seen before.  Berkeley doesn't seem to do this, but it's
3207  * always hard to be sure.
3208  */
3209   /* In case it's just a late ack, let it through. */
3210   if (th->ack && len == (th->doff * 4) &&
3211       after(th->seq, sk->acked_seq - 32767) &&
3212       !th->fin && !th->syn) return(1);
3213 #endif
3214 
3215   if (!th->rst) {
3216         /* Try to resync things. */
3217         tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
3218   }
3219   return(0);
3220 }
3221 
3222 
3223 
3224 
3225 
3226 int
3227 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3228         unsigned long daddr, unsigned short len,
3229         unsigned long saddr, int redo, struct inet_protocol * protocol)
3230 {
3231   struct tcphdr *th;
3232   struct sock *sk;
3233 
3234   if (!skb) {
3235         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv skb = NULL\n"));
3236         return(0);
3237   }
3238 #if 0   /* FIXME: it's ok for protocol to be NULL */
3239   if (!protocol) {
3240         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv protocol = NULL\n"));
3241         return(0);
3242   }
3243 
3244   if (!opt) {   /* FIXME: it's ok for opt to be NULL */
3245         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv opt = NULL\n"));
3246   }
3247 #endif
3248   if (!dev) {
3249         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv dev = NULL\n"));
3250         return(0);
3251   }
3252   th = skb->h.th;
3253 
3254   /* Find the socket. */
3255   sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3256   DPRINTF((DBG_TCP, "<<\n"));
3257   DPRINTF((DBG_TCP, "len = %d, redo = %d, skb=%X\n", len, redo, skb));
3258   
3259   /* If this socket has got a reset its to all intents and purposes 
3260      really dead */
3261   if (sk!=NULL && sk->zapped)
3262         sk=NULL;
3263 
3264   if (sk) {
3265          DPRINTF((DBG_TCP, "sk = %X:\n", sk));
3266   }
3267 
3268   if (!redo) {
3269         if (tcp_check(th, len, saddr, daddr )) {
3270                 skb->sk = NULL;
3271                 DPRINTF((DBG_TCP, "packet dropped with bad checksum.\n"));
3272 if (inet_debug == DBG_SLIP) printk("\rtcp_rcv: bad checksum\n");
3273                 kfree_skb(skb,FREE_READ);
3274                 /*
3275                  * We don't release the socket because it was
3276                  * never marked in use.
3277                  */
3278                 return(0);
3279         }
3280 
3281         /* See if we know about the socket. */
3282         if (sk == NULL) {
3283                 if (!th->rst) 
3284                 {       
3285                         th->seq = ntohl(th->seq);
3286                         /* So reset is always called with th->seq in host order */
3287                         tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3288                 }
3289                 skb->sk = NULL;
3290                 kfree_skb(skb, FREE_READ);
3291                 return(0);
3292         }
3293 
3294         skb->len = len;
3295         skb->sk = sk;
3296         skb->acked = 0;
3297         skb->used = 0;
3298         skb->free = 0;
3299         skb->urg_used = 0;
3300         skb->saddr = daddr;
3301         skb->daddr = saddr;
3302 
3303         th->seq = ntohl(th->seq);
3304 
3305        /* We may need to add it to the backlog here. */
3306        cli();
3307        if (sk->inuse) {
3308                 if (sk->back_log == NULL) {
3309                         sk->back_log = skb;
3310                         skb->next = skb;
3311                         skb->prev = skb;
3312                 } else {
3313                         skb->next = sk->back_log;
3314                         skb->prev = sk->back_log->prev;
3315                         skb->prev->next = skb;
3316                         skb->next->prev = skb;
3317                 }
3318                 sti();
3319                 return(0);
3320         }
3321         sk->inuse = 1;
3322         sti();
3323   } else {
3324         if (!sk) {
3325                 DPRINTF((DBG_TCP, "tcp.c: tcp_rcv bug sk=NULL redo = 1\n"));
3326                 return(0);
3327         }
3328   }
3329 
3330   if (!sk->prot) {
3331         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv sk->prot = NULL \n"));
3332         return(0);
3333   }
3334 
3335   /* Charge the memory to the socket. */
3336   if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) {
3337         skb->sk = NULL;
3338         DPRINTF((DBG_TCP, "dropping packet due to lack of buffer space.\n"));
3339         kfree_skb(skb, FREE_READ);
3340         release_sock(sk);
3341         return(0);
3342   }
3343   sk->rmem_alloc += skb->mem_len;
3344 
3345   DPRINTF((DBG_TCP, "About to do switch.\n"));
3346 
3347   /* Now deal with it. */
3348   switch(sk->state) {
3349         /*
3350          * This should close the system down if it's waiting
3351          * for an ack that is never going to be sent.
3352          */
3353         case TCP_LAST_ACK:
3354                 if (th->rst) {
3355                         sk->zapped=1;
3356                         sk->err = ECONNRESET;
3357                         sk->state = TCP_CLOSE;
3358                         sk->shutdown = SHUTDOWN_MASK;
3359                         if (!sk->dead) {
3360                                 sk->state_change(sk);
3361                         }
3362                         kfree_skb(skb, FREE_READ);
3363                         release_sock(sk);
3364                         return(0);
3365                 }
3366 
3367         case TCP_ESTABLISHED:
3368         case TCP_CLOSE_WAIT:
3369         case TCP_FIN_WAIT1:
3370         case TCP_FIN_WAIT2:
3371         case TCP_TIME_WAIT:
3372                 if (!tcp_sequence(sk, th, len, opt, saddr,dev)) {
3373 if (inet_debug == DBG_SLIP) printk("\rtcp_rcv: not in seq\n");
3374 #ifdef undef
3375 /* nice idea, but tcp_sequence already does this.  Maybe it shouldn't?? */
3376                         if(!th->rst)
3377                                 tcp_send_ack(sk->send_seq, sk->acked_seq, 
3378                                      sk, th, saddr);
3379 #endif
3380                         kfree_skb(skb, FREE_READ);
3381                         release_sock(sk);
3382                         return(0);
3383                 }
3384 
3385                 if (th->rst) {
3386                         sk->zapped=1;
3387                         /* This means the thing should really be closed. */
3388                         sk->err = ECONNRESET;
3389 
3390                         if (sk->state == TCP_CLOSE_WAIT) {
3391                                 sk->err = EPIPE;
3392                         }
3393 
3394                         /*
3395                          * A reset with a fin just means that
3396                          * the data was not all read.
3397                          */
3398                         sk->state = TCP_CLOSE;
3399                         sk->shutdown = SHUTDOWN_MASK;
3400                         if (!sk->dead) {
3401                                 sk->state_change(sk);
3402                         }
3403                         kfree_skb(skb, FREE_READ);
3404                         release_sock(sk);
3405                         return(0);
3406                 }
3407                 if (
3408 #if 0
3409                 if ((opt && (opt->security != 0 ||
3410                             opt->compartment != 0)) || 
3411 #endif
3412                                  th->syn) {
3413                         sk->err = ECONNRESET;
3414                         sk->state = TCP_CLOSE;
3415                         sk->shutdown = SHUTDOWN_MASK;
3416                         tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
3417                         if (!sk->dead) {
3418                                 sk->state_change(sk);
3419                         }
3420                         kfree_skb(skb, FREE_READ);
3421                         release_sock(sk);
3422                         return(0);
3423                 }
3424                 if (th->ack) {
3425                         if (!tcp_ack(sk, th, saddr, len)) {
3426                                 kfree_skb(skb, FREE_READ);
3427                                 release_sock(sk);
3428                                 return(0);
3429                         }
3430                 }
3431                 if (th->urg) {
3432                         if (tcp_urg(sk, th, saddr)) {
3433                                 kfree_skb(skb, FREE_READ);
3434                                 release_sock(sk);
3435                                 return(0);
3436                         }
3437                 }
3438 
3439                 if (tcp_data(skb, sk, saddr, len)) {
3440                         kfree_skb(skb, FREE_READ);
3441                         release_sock(sk);
3442                         return(0);
3443                 }
3444 
3445                 /* Moved: you must do data then fin bit */
3446                 if (th->fin && tcp_fin(sk, th, saddr, dev)) {
3447                         kfree_skb(skb, FREE_READ);
3448                         release_sock(sk);
3449                         return(0);
3450                 }
3451 
3452                 release_sock(sk);
3453                 return(0);
3454 
3455         case TCP_CLOSE:
3456                 if (sk->dead || sk->daddr) {
3457                         DPRINTF((DBG_TCP, "packet received for closed,dead socket\n"));
3458                         kfree_skb(skb, FREE_READ);
3459                         release_sock(sk);
3460                         return(0);
3461                 }
3462 
3463                 if (!th->rst) {
3464                         if (!th->ack)
3465                                 th->ack_seq = 0;
3466                         tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3467                 }
3468                 kfree_skb(skb, FREE_READ);
3469                 release_sock(sk);
3470                 return(0);
3471 
3472         case TCP_LISTEN:
3473                 if (th->rst) {
3474                         kfree_skb(skb, FREE_READ);
3475                         release_sock(sk);
3476                         return(0);
3477                 }
3478                 if (th->ack) {
3479                         tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3480                         kfree_skb(skb, FREE_READ);
3481                         release_sock(sk);
3482                         return(0);
3483                 }
3484 
3485                 if (th->syn) {
3486 #if 0
3487                         if (opt->security != 0 || opt->compartment != 0) {
3488                                 tcp_reset(daddr, saddr, th, prot, opt,dev);
3489                                 release_sock(sk);
3490                                 return(0);
3491                         }
3492 #endif
3493 
3494                         /*
3495                          * Now we just put the whole thing including
3496                          * the header and saddr, and protocol pointer
3497                          * into the buffer.  We can't respond until the
3498                          * user tells us to accept the connection.
3499                          */
3500                         tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
3501                         release_sock(sk);
3502                         return(0);
3503                 }
3504 
3505                 kfree_skb(skb, FREE_READ);
3506                 release_sock(sk);
3507                 return(0);
3508 
3509         default:
3510                 if (!tcp_sequence(sk, th, len, opt, saddr,dev)) {
3511                         kfree_skb(skb, FREE_READ);
3512                         release_sock(sk);
3513                         return(0);
3514                 }
3515 
3516         case TCP_SYN_SENT:
3517                 if (th->rst) {
3518                         sk->err = ECONNREFUSED;
3519                         sk->state = TCP_CLOSE;
3520                         sk->shutdown = SHUTDOWN_MASK;
3521                         sk->zapped = 1;
3522                         if (!sk->dead) {
3523                                 sk->state_change(sk);
3524                         }
3525                         kfree_skb(skb, FREE_READ);
3526                         release_sock(sk);
3527                         return(0);
3528                 }
3529 #if 0
3530                 if (opt->security != 0 || opt->compartment != 0) {
3531                         sk->err = ECONNRESET;
3532                         sk->state = TCP_CLOSE;
3533                         sk->shutdown = SHUTDOWN_MASK;
3534                         tcp_reset(daddr, saddr,  th, sk->prot, opt, dev);
3535                         if (!sk->dead) {
3536                                 wake_up_interruptible(sk->sleep);
3537                         }
3538                         kfree_skb(skb, FREE_READ);
3539                         release_sock(sk);
3540                         return(0);
3541                 }
3542 #endif
3543                 if (!th->ack) {
3544                         if (th->syn) {
3545                                 sk->state = TCP_SYN_RECV;
3546                         }
3547 
3548                         kfree_skb(skb, FREE_READ);
3549                         release_sock(sk);
3550                         return(0);
3551                 }
3552 
3553                 switch(sk->state) {
3554                         case TCP_SYN_SENT:
3555                                 if (!tcp_ack(sk, th, saddr, len)) {
3556                                         tcp_reset(daddr, saddr, th,
3557                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3558                                         kfree_skb(skb, FREE_READ);
3559                                         release_sock(sk);
3560                                         return(0);
3561                                 }
3562 
3563                                 /*
3564                                  * If the syn bit is also set, switch to
3565                                  * tcp_syn_recv, and then to established.
3566                                  */
3567                                 if (!th->syn) {
3568                                         kfree_skb(skb, FREE_READ);
3569                                         release_sock(sk);
3570                                         return(0);
3571                                 }
3572 
3573                                 /* Ack the syn and fall through. */
3574                                 sk->acked_seq = th->seq+1;
3575                                 sk->fin_seq = th->seq;
3576                                 tcp_send_ack(sk->send_seq, th->seq+1,
3577                                                         sk, th, sk->daddr);
3578         
3579                         case TCP_SYN_RECV:
3580                                 if (!tcp_ack(sk, th, saddr, len)) {
3581                                         tcp_reset(daddr, saddr, th,
3582                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
3583                                         kfree_skb(skb, FREE_READ);
3584                                         release_sock(sk);
3585                                         return(0);
3586                                 }
3587                                 sk->state = TCP_ESTABLISHED;
3588 
3589                                 /*
3590                                  * Now we need to finish filling out
3591                                  * some of the tcp header.
3592                                  */
3593                                 /* We need to check for mtu info. */
3594                                 tcp_options(sk, th);
3595                                 sk->dummy_th.dest = th->source;
3596                                 sk->copied_seq = sk->acked_seq-1;
3597                                 if (!sk->dead) {
3598                                         sk->state_change(sk);
3599                                 }
3600 
3601                                 /*
3602                                  * We've already processed his first
3603                                  * ack.  In just about all cases that
3604                                  * will have set max_window.  This is
3605                                  * to protect us against the possibility
3606                                  * that the initial window he sent was 0.
3607                                  * This must occur after tcp_options, which
3608                                  * sets sk->mtu.
3609                                  */
3610                                 if (sk->max_window == 0) {
3611                                   sk->max_window = 32;
3612                                   sk->mss = min(sk->max_window, sk->mtu);
3613                                 }
3614 
3615                                 /*
3616                                  * Now process the rest like we were
3617                                  * already in the established state.
3618                                  */
3619                                 if (th->urg) {
3620                                         if (tcp_urg(sk, th, saddr)) { 
3621                                                 kfree_skb(skb, FREE_READ);
3622                                                 release_sock(sk);
3623                                                 return(0);
3624                                         }
3625                         }
3626                         if (tcp_data(skb, sk, saddr, len))
3627                                                 kfree_skb(skb, FREE_READ);
3628 
3629                         if (th->fin) tcp_fin(sk, th, saddr, dev);
3630                         release_sock(sk);
3631                         return(0);
3632                 }
3633 
3634                 if (th->urg) {
3635                         if (tcp_urg(sk, th, saddr)) {
3636                                 kfree_skb(skb, FREE_READ);
3637                                 release_sock(sk);
3638                                 return(0);
3639                         }
3640                 }
3641 
3642                 if (tcp_data(skb, sk, saddr, len)) {
3643                         kfree_skb(skb, FREE_READ);
3644                         release_sock(sk);
3645                         return(0);
3646                 }
3647 
3648                 if (!th->fin) {
3649                         release_sock(sk);
3650                         return(0);
3651                 }
3652                 tcp_fin(sk, th, saddr, dev);
3653                 release_sock(sk);
3654                 return(0);
3655         }
3656 }
3657 
3658 
3659 /*
3660   * This routine sends a packet with an out of date sequence
3661   * number. It assumes the other end will try to ack it.
3662   */
3663 static void
3664 tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3665 {
3666   struct sk_buff *buff;
3667   struct tcphdr *t1;
3668   struct device *dev=NULL;
3669   int tmp;
3670 
3671   if (sk->zapped)
3672         return; /* Afer a valid reset we can send no more */
3673 
3674   if (sk -> state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) return;
3675 
3676   buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
3677   if (buff == NULL) return;
3678 
3679   buff->mem_addr = buff;
3680   buff->mem_len = MAX_ACK_SIZE;
3681   buff->len = sizeof(struct tcphdr);
3682   buff->free = 1;
3683   buff->sk = sk;
3684   DPRINTF((DBG_TCP, "in tcp_write_wakeup\n"));
3685   t1 = (struct tcphdr *) buff->data;
3686 
3687   /* Put in the IP header and routing stuff. */
3688   tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3689                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
3690   if (tmp < 0) {
3691         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3692         return;
3693   }
3694 
3695   buff->len += tmp;
3696   t1 = (struct tcphdr *)((char *)t1 +tmp);
3697 
3698   memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
3699 
3700   /*
3701    * Use a previous sequence.
3702    * This should cause the other end to send an ack.
3703    */
3704   t1->seq = ntohl(sk->send_seq-1);
3705   t1->ack = 1; 
3706   t1->res1= 0;
3707   t1->res2= 0;
3708   t1->rst = 0;
3709   t1->urg = 0;
3710   t1->psh = 0;
3711   t1->fin = 0;
3712   t1->syn = 0;
3713   t1->ack_seq = ntohl(sk->acked_seq);
3714   t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
3715   t1->doff = sizeof(*t1)/4;
3716   tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
3717 
3718   /* Send it and free it.
3719    * This will prevent the timer from automatically being restarted.
3720   */
3721   sk->prot->queue_xmit(sk, dev, buff, 1);
3722 }
3723 
3724 /*
3725  * This routine probes a zero window.  It makes a copy of the first
3726  * packet in the write queue, but with just one byte of data.
3727  */
3728 void
3729 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3730 {
3731   unsigned char *raw;
3732   struct iphdr *iph;
3733   struct sk_buff *skb2, *skb;
3734   int len, hlen, data;
3735   struct tcphdr *t1;
3736   struct device *dev;
3737 
3738   if (sk->zapped)
3739         return; /* Afer a valid reset we can send no more */
3740 
3741   if (sk -> state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT &&
3742       sk -> state != TCP_FIN_WAIT1 && sk->state != TCP_FIN_WAIT2)
3743         return;
3744 
3745   skb = sk->wfront;
3746   if (skb == NULL)
3747         return;
3748 
3749   dev = skb->dev;
3750   /* I know this can't happen but as it does.. */
3751   if(dev==NULL)
3752     {
3753       printk("tcp_send_probe0: NULL device bug!\n");
3754       return;
3755     }
3756   IS_SKB(skb);
3757 
3758   raw = skb->data;
3759   iph = (struct iphdr *) (raw + dev->hard_header_len);
3760 
3761   hlen = (iph->ihl * sizeof(unsigned long)) + dev->hard_header_len;
3762   data = skb->len - hlen - sizeof(struct tcphdr);
3763   len = hlen + sizeof(struct tcphdr) + (data ? 1 : 0);
3764         
3765   /* Allocate buffer. */
3766   if ((skb2 = alloc_skb(sizeof(struct sk_buff) + len, GFP_ATOMIC)) == NULL) {
3767 /*    printk("alloc failed raw %x th %x hlen %d data %d len %d\n",
3768            raw, skb->h.th, hlen, data, len); */
3769     reset_timer (sk, TIME_PROBE0, 10);  /* try again real soon */
3770     return;
3771   }
3772 
3773   skb2->arp = skb->arp;
3774   skb2->len = len;
3775   skb2->h.raw = (char *)(skb2->data);
3776  
3777   sk->wmem_alloc += skb2->mem_len;
3778  
3779   /* Copy the packet header into the new buffer. */
3780   memcpy(skb2->h.raw, raw, len);
3781  
3782   skb2->h.raw += hlen;  /* it's now h.th -- pointer to the tcp header */
3783   t1 = skb2->h.th;
3784  
3785 /* source, dest, seq, from existing packet */
3786   t1->ack_seq = ntohl(sk->acked_seq);
3787   t1->res1 = 0;
3788 /* doff, fin, from existing packet.  Fin is safe because Linux always
3789  * sends fin in a separate packet
3790  * syn, rst, had better be zero in original */
3791   t1->ack = 1;
3792   t1->urg = 0;  /* urgent pointer might be beyond this fragment */
3793   t1->res2 = 0;
3794   t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
3795   t1->urg_ptr = 0;
3796   tcp_send_check(t1, sk->saddr, sk->daddr, len - hlen, sk);
3797   /* Send it and free it.
3798    * This will prevent the timer from automatically being restarted.
3799    */
3800   sk->prot->queue_xmit(sk, dev, skb2, 1);
3801   sk->backoff++;
3802   /*
3803    * in the case of retransmissions, there's good reason to limit
3804    * rto to 120 sec, as that's the maximum legal RTT on the Internet.
3805    * For probes it could reasonably be longer.  However making it
3806    * much longer could cause unacceptable delays in some situation,
3807    * so we might as well use the same value
3808    */
3809   sk->rto = min(sk->rto << 1, 120*HZ);
3810   reset_timer (sk, TIME_PROBE0, sk->rto);
3811   sk->retransmits++;
3812   sk->prot->retransmits ++;
3813 }
3814 
3815 /*
3816  *      Socket option code for TCP. 
3817  */
3818   
3819 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
3820 {
3821         int val,err;
3822 
3823         if(level!=SOL_TCP)
3824                 return ip_setsockopt(sk,level,optname,optval,optlen);
3825 
3826         if (optval == NULL) 
3827                 return(-EINVAL);
3828 
3829         err=verify_area(VERIFY_READ, optval, sizeof(int));
3830         if(err)
3831                 return err;
3832         
3833         val = get_fs_long((unsigned long *)optval);
3834 
3835         switch(optname)
3836         {
3837                 case TCP_MAXSEG:
3838 /*                      if(val<200||val>2048 || val>sk->mtu) */
3839 /*
3840  * values greater than interface MTU won't take effect.  however at
3841  * the point when this call is done we typically don't yet know
3842  * which interface is going to be used
3843  */
3844                         if(val<1||val>MAX_WINDOW)
3845                                 return -EINVAL;
3846                         sk->user_mss=val;
3847                         return 0;
3848                 case TCP_NODELAY:
3849                         sk->nonagle=(val==0)?0:1;
3850                         return 0;
3851                 default:
3852                         return(-ENOPROTOOPT);
3853         }
3854 }
3855 
3856 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
3857 {
3858         int val,err;
3859 
3860         if(level!=SOL_TCP)
3861                 return ip_getsockopt(sk,level,optname,optval,optlen);
3862                         
3863         switch(optname)
3864         {
3865                 case TCP_MAXSEG:
3866                         val=sk->user_mss;
3867                         break;
3868                 case TCP_NODELAY:
3869                         val=sk->nonagle;        /* Until Johannes stuff is in */
3870                         break;
3871                 default:
3872                         return(-ENOPROTOOPT);
3873         }
3874         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
3875         if(err)
3876                 return err;
3877         put_fs_long(sizeof(int),(unsigned long *) optlen);
3878 
3879         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
3880         if(err)
3881                 return err;
3882         put_fs_long(val,(unsigned long *)optval);
3883 
3884         return(0);
3885 }       
3886 
3887 
3888 struct proto tcp_prot = {
3889   sock_wmalloc,
3890   sock_rmalloc,
3891   sock_wfree,
3892   sock_rfree,
3893   sock_rspace,
3894   sock_wspace,
3895   tcp_close,
3896   tcp_read,
3897   tcp_write,
3898   tcp_sendto,
3899   tcp_recvfrom,
3900   ip_build_header,
3901   tcp_connect,
3902   tcp_accept,
3903   ip_queue_xmit,
3904   tcp_retransmit,
3905   tcp_write_wakeup,
3906   tcp_read_wakeup,
3907   tcp_rcv,
3908   tcp_select,
3909   tcp_ioctl,
3910   NULL,
3911   tcp_shutdown,
3912   tcp_setsockopt,
3913   tcp_getsockopt,
3914   128,
3915   0,
3916   {NULL,},
3917   "TCP"
3918 };

/* [previous][next][first][last][top][bottom][index][help] */