root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. __print_th
  3. print_th
  4. tcp_select_window
  5. tcp_time_wait
  6. tcp_retransmit
  7. tcp_err
  8. tcp_readable
  9. tcp_select
  10. tcp_ioctl
  11. tcp_check
  12. tcp_send_check
  13. tcp_send_skb
  14. tcp_dequeue_partial
  15. tcp_send_partial
  16. tcp_enqueue_partial
  17. tcp_send_ack
  18. tcp_build_header
  19. tcp_write
  20. tcp_sendto
  21. tcp_read_wakeup
  22. cleanup_rbuf
  23. tcp_read_urg
  24. tcp_read
  25. tcp_shutdown
  26. tcp_recvfrom
  27. tcp_reset
  28. tcp_options
  29. default_mask
  30. tcp_conn_request
  31. tcp_close
  32. tcp_write_xmit
  33. sort_send
  34. tcp_ack
  35. tcp_data
  36. tcp_check_urg
  37. tcp_urg
  38. tcp_fin
  39. tcp_accept
  40. tcp_connect
  41. tcp_sequence
  42. tcp_rcv
  43. tcp_write_wakeup
  44. tcp_send_probe0
  45. tcp_setsockopt
  46. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *
  19  * Fixes:       
  20  *              Alan Cox        :       Numerous verify_area() calls
  21  *              Alan Cox        :       Set the ACK bit on a reset
  22  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  23  *                                      and was trying to connect (tcp_err()).
  24  *              Alan Cox        :       All icmp error handling was broken
  25  *                                      pointers passed where wrong and the
  26  *                                      socket was looked up backwards. Nobody
  27  *                                      tested any icmp error code obviously.
  28  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  29  *                                      on errors. select behaves and the icmp error race
  30  *                                      has gone by moving it into sock.c
  31  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  32  *                                      packets for unknown sockets.
  33  *              Alan Cox        :       tcp option processing.
  34  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  35  *              Herp Rosmanith  :       More reset fixes
  36  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  37  *                                      any kind of RST is right out.
  38  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  39  *                                      otherwise odd bits of prattle escape still
  40  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  41  *                                      LAN workplace lockups.
  42  *              Alan Cox        :       Some tidyups using the new skb list facilities
  43  *              Alan Cox        :       sk->keepopen now seems to work
  44  *              Alan Cox        :       Pulls options out correctly on accepts
  45  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  46  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  47  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  48  *              Alan Cox        :       Added some beter commenting, as the tcp is hard to follow
  49  *              Alan Cox        :       Removed incorrect check for 20 * psh
  50  *      Michael O'Reilly        :       ack < copied bug fix.
  51  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  52  *              Alan Cox        :       FIN with no memory -> CRASH
  53  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  54  *              Alan Cox        :       Added TCP options (SOL_TCP)
  55  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  56  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  57  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  58  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  59  *              Alan Cox        :       Put in missing check for SYN bit.
  60  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  61  *                                      window non shrink trick.
  62  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  63  *              Charles Hedrick :       TCP fixes
  64  *              Toomas Tamm     :       TCP window fixes
  65  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  66  *              Charles Hedrick :       Rewrote most of it to actually work
  67  *              Linus           :       Rewrote tcp_read() and URG handling
  68  *                                      completely
  69  *              Gerhard Koerting:       Fixed some missing timer handling
  70  *
  71  *
  72  * To Fix:
  73  *                      Possibly a problem with accept(). BSD accept never fails after
  74  *              it causes a select. Linux can - given the official select semantics I
  75  *              feel that _really_ its the BSD network programs that are bust (notably
  76  *              inetd, which hangs occasionally because of this).
  77  *                      Protocol closedown badly messed up.
  78  *
  79  *              This program is free software; you can redistribute it and/or
  80  *              modify it under the terms of the GNU General Public License
  81  *              as published by the Free Software Foundation; either version
  82  *              2 of the License, or(at your option) any later version.
  83  */
  84 #include <linux/types.h>
  85 #include <linux/sched.h>
  86 #include <linux/mm.h>
  87 #include <linux/string.h>
  88 #include <linux/socket.h>
  89 #include <linux/sockios.h>
  90 #include <linux/termios.h>
  91 #include <linux/in.h>
  92 #include <linux/fcntl.h>
  93 #include <linux/inet.h>
  94 #include <linux/netdevice.h>
  95 #include "snmp.h"
  96 #include "ip.h"
  97 #include "protocol.h"
  98 #include "icmp.h"
  99 #include "tcp.h"
 100 #include <linux/skbuff.h>
 101 #include "sock.h"
 102 #include <linux/errno.h>
 103 #include <linux/timer.h>
 104 #include <asm/system.h>
 105 #include <asm/segment.h>
 106 #include <linux/mm.h>
 107 
 108 #define SEQ_TICK 3
 109 unsigned long seq_offset;
 110 struct tcp_mib  tcp_statistics;
 111 
 112 #define SUBNETSARELOCAL
 113 
 114 static __inline__ int 
 115 min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 116 {
 117   if (a < b) return(a);
 118   return(b);
 119 }
 120 
 121 
 122 static void __print_th(struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
 123 {
 124         unsigned char *ptr;
 125 
 126         printk("TCP header:\n");
 127         printk("    source=%d, dest=%d, seq =%ld, ack_seq = %ld\n",
 128                 ntohs(th->source), ntohs(th->dest),
 129                 ntohl(th->seq), ntohl(th->ack_seq));
 130         printk("    fin=%d, syn=%d, rst=%d, psh=%d, ack=%d, urg=%d res1=%d res2=%d\n",
 131                 th->fin, th->syn, th->rst, th->psh, th->ack,
 132                 th->urg, th->res1, th->res2);
 133         printk("    window = %d, check = %d urg_ptr = %d\n",
 134                 ntohs(th->window), ntohs(th->check), ntohs(th->urg_ptr));
 135         printk("    doff = %d\n", th->doff);
 136         ptr =(unsigned char *)(th + 1);
 137         printk("    options = %d %d %d %d\n", ptr[0], ptr[1], ptr[2], ptr[3]);
 138 }
 139 
 140 static inline void print_th(struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
 141 {
 142         if (inet_debug == DBG_TCP)
 143                 __print_th(th);
 144 }
 145 
 146 
 147 /* This routine picks a TCP windows for a socket based on
 148    the following constraints
 149    
 150    1. The window can never be shrunk once it is offered (RFC 793)
 151    2. We limit memory per socket
 152    
 153    For now we use NET2E3's heuristic of offering half the memory
 154    we have handy. All is not as bad as this seems however because
 155    of two things. Firstly we will bin packets even within the window
 156    in order to get the data we are waiting for into the memory limit.
 157    Secondly we bin common duplicate forms at receive time
 158 
 159    Better heuristics welcome
 160 */
 161    
 162 static int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 163 {
 164         int new_window = sk->prot->rspace(sk);
 165 
 166 /*
 167  * two things are going on here.  First, we don't ever offer a
 168  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 169  * receiver side of SWS as specified in RFC1122.
 170  * Second, we always give them at least the window they
 171  * had before, in order to avoid retracting window.  This
 172  * is technically allowed, but RFC1122 advises against it and
 173  * in practice it causes trouble.
 174  */
 175         if (new_window < min(sk->mss, MAX_WINDOW/2) ||
 176             new_window < sk->window)
 177           return(sk->window);
 178         return(new_window);
 179 }
 180 
 181 /* Enter the time wait state. */
 182 
 183 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 184 {
 185   sk->state = TCP_TIME_WAIT;
 186   sk->shutdown = SHUTDOWN_MASK;
 187   if (!sk->dead)
 188         sk->state_change(sk);
 189   reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 190 }
 191 
 192 /*
 193  *      A timer event has trigger a tcp retransmit timeout. The
 194  *      socket xmit queue is ready and set up to send. Because
 195  *      the ack receive code keeps the queue straight we do
 196  *      nothing clever here.
 197  */
 198 
 199 static void
 200 tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 201 {
 202   if (all) {
 203         ip_retransmit(sk, all);
 204         return;
 205   }
 206 
 207   sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 208   /* sk->ssthresh in theory can be zero.  I guess that's OK */
 209   sk->cong_count = 0;
 210 
 211   sk->cong_window = 1;
 212 
 213   /* Do the actual retransmit. */
 214   ip_retransmit(sk, all);
 215 }
 216 
 217 
 218 /*
 219  * This routine is called by the ICMP module when it gets some
 220  * sort of error condition.  If err < 0 then the socket should
 221  * be closed and the error returned to the user.  If err > 0
 222  * it's just the icmp type << 8 | icmp code.  After adjustment
 223  * header points to the first 8 bytes of the tcp header.  We need
 224  * to find the appropriate port.
 225  */
 226 void
 227 tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 228         unsigned long saddr, struct inet_protocol *protocol)
 229 {
 230   struct tcphdr *th;
 231   struct sock *sk;
 232   struct iphdr *iph=(struct iphdr *)header;
 233   
 234   header+=4*iph->ihl;
 235    
 236   DPRINTF((DBG_TCP, "TCP: tcp_err(%d, hdr=%X, daddr=%X saddr=%X, protocol=%X)\n",
 237                                         err, header, daddr, saddr, protocol));
 238 
 239   th =(struct tcphdr *)header;
 240   sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr);
 241   print_th(th);
 242 
 243   if (sk == NULL) return;
 244   
 245   if(err<0)
 246   {
 247         sk->err = -err;
 248         sk->error_report(sk);
 249         return;
 250   }
 251 
 252   if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) {
 253         /*
 254          * FIXME:
 255          * For now we will just trigger a linear backoff.
 256          * The slow start code should cause a real backoff here.
 257          */
 258         if (sk->cong_window > 4) sk->cong_window--;
 259         return;
 260   }
 261 
 262   DPRINTF((DBG_TCP, "TCP: icmp_err got error\n"));
 263   sk->err = icmp_err_convert[err & 0xff].errno;
 264 
 265   /*
 266    * If we've already connected we will keep trying
 267    * until we time out, or the user gives up.
 268    */
 269   if (icmp_err_convert[err & 0xff].fatal) {
 270         if (sk->state == TCP_SYN_SENT) {
 271                 tcp_statistics.TcpAttemptFails++;
 272                 sk->state = TCP_CLOSE;
 273                 sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 274         }
 275   }
 276   return;
 277 }
 278 
 279 
 280 /*
 281  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 282  *      in the received data queue (ie a frame missing that needs sending to us)
 283  */
 284 
 285 static int
 286 tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 287 {
 288   unsigned long counted;
 289   unsigned long amount;
 290   struct sk_buff *skb;
 291   int sum;
 292   unsigned long flags;
 293 
 294   DPRINTF((DBG_TCP, "tcp_readable(sk=%X)\n", sk));
 295   if(sk && sk->debug)
 296         printk("tcp_readable: %p - ",sk);
 297 
 298   save_flags(flags);
 299   cli();
 300   if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 301   {
 302         restore_flags(flags);
 303         if(sk && sk->debug) 
 304                 printk("empty\n");
 305         return(0);
 306   }
 307   
 308   counted = sk->copied_seq+1;   /* Where we are at the moment */
 309   amount = 0;
 310   
 311   /* Do until a push or until we are out of data. */
 312   do {
 313         if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 314                 break;
 315         sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 316         if (skb->h.th->syn)
 317                 sum++;
 318         if (sum >= 0) {                                 /* Add it up, move on */
 319                 amount += sum;
 320                 if (skb->h.th->syn) amount--;
 321                 counted += sum;
 322         }
 323         if (amount && skb->h.th->psh) break;
 324         skb = skb->next;
 325   } while(skb != (struct sk_buff *)&sk->receive_queue);
 326   if (amount && !sk->urginline && sk->urg_data &&
 327       (sk->urg_seq - sk->copied_seq) <= (counted - sk->copied_seq))
 328         amount--;               /* don't count urg data */
 329   restore_flags(flags);
 330   DPRINTF((DBG_TCP, "tcp readable returning %d bytes\n", amount));
 331   if(sk->debug)
 332         printk("got %lu bytes.\n",amount);
 333   return(amount);
 334 }
 335 
 336 
 337 /*
 338  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 339  *      listening socket has a receive queue of sockets to accept.
 340  */
 341 
 342 static int
 343 tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 344 {
 345   DPRINTF((DBG_TCP, "tcp_select(sk=%X, sel_type = %d, wait = %X)\n",
 346                                                 sk, sel_type, wait));
 347 
 348   sk->inuse = 1;
 349   switch(sel_type) {
 350         case SEL_IN:
 351                 if(sk->debug)
 352                         printk("select in");
 353                 select_wait(sk->sleep, wait);
 354                 if(sk->debug)
 355                         printk("-select out");
 356                 if (skb_peek(&sk->receive_queue) != NULL) {
 357                         if (sk->state == TCP_LISTEN || tcp_readable(sk)) {
 358                                 release_sock(sk);
 359                                 if(sk->debug)
 360                                         printk("-select ok data\n");
 361                                 return(1);
 362                         }
 363                 }
 364                 if (sk->err != 0)       /* Receiver error */
 365                 {
 366                         release_sock(sk);
 367                         if(sk->debug)
 368                                 printk("-select ok error");
 369                         return(1);
 370                 }
 371                 if (sk->shutdown & RCV_SHUTDOWN) {
 372                         release_sock(sk);
 373                         if(sk->debug)
 374                                 printk("-select ok down\n");
 375                         return(1);
 376                 } else {
 377                         release_sock(sk);
 378                         if(sk->debug)
 379                                 printk("-select fail\n");
 380                         return(0);
 381                 }
 382         case SEL_OUT:
 383                 select_wait(sk->sleep, wait);
 384                 if (sk->shutdown & SEND_SHUTDOWN) {
 385                         DPRINTF((DBG_TCP,
 386                                 "write select on shutdown socket.\n"));
 387 
 388                         /* FIXME: should this return an error? */
 389                         release_sock(sk);
 390                         return(0);
 391                 }
 392 
 393                 /*
 394                  * FIXME:
 395                  * Hack so it will probably be able to write
 396                  * something if it says it's ok to write.
 397                  */
 398                 if (sk->prot->wspace(sk) >= sk->mss) {
 399                         release_sock(sk);
 400                         /* This should cause connect to work ok. */
 401                         if (sk->state == TCP_SYN_RECV ||
 402                             sk->state == TCP_SYN_SENT) return(0);
 403                         return(1);
 404                 }
 405                 DPRINTF((DBG_TCP,
 406                         "tcp_select: sleeping on write sk->wmem_alloc = %d, "
 407                         "sk->packets_out = %d\n"
 408                         "sk->write_seq = %u, sk->window_seq=%u\n", 
 409                                 sk->wmem_alloc, sk->packets_out,
 410                                 sk->write_seq, sk->window_seq));
 411 
 412                 release_sock(sk);
 413                 return(0);
 414         case SEL_EX:
 415                 select_wait(sk->sleep,wait);
 416                 if (sk->err || sk->urg_data) {
 417                         release_sock(sk);
 418                         return(1);
 419                 }
 420                 release_sock(sk);
 421                 return(0);
 422   }
 423 
 424   release_sock(sk);
 425   return(0);
 426 }
 427 
 428 
 429 int
 430 tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 431 {
 432   int err;
 433   DPRINTF((DBG_TCP, "tcp_ioctl(sk=%X, cmd = %d, arg=%X)\n", sk, cmd, arg));
 434   switch(cmd) {
 435         case DDIOCSDBG:
 436                 return(dbg_ioctl((void *) arg, DBG_TCP));
 437 
 438         case TIOCINQ:
 439 #ifdef FIXME    /* FIXME: */
 440         case FIONREAD:
 441 #endif
 442                 {
 443                         unsigned long amount;
 444 
 445                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 446 
 447                         sk->inuse = 1;
 448                         amount = tcp_readable(sk);
 449                         release_sock(sk);
 450                         DPRINTF((DBG_TCP, "returning %d\n", amount));
 451                         err=verify_area(VERIFY_WRITE,(void *)arg,
 452                                                    sizeof(unsigned long));
 453                         if(err)
 454                                 return err;
 455                         put_fs_long(amount,(unsigned long *)arg);
 456                         return(0);
 457                 }
 458         case SIOCATMARK:
 459                 {
 460                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
 461 
 462                         err = verify_area(VERIFY_WRITE,(void *) arg,
 463                                                   sizeof(unsigned long));
 464                         if (err)
 465                                 return err;
 466                         put_fs_long(answ,(int *) arg);
 467                         return(0);
 468                 }
 469         case TIOCOUTQ:
 470                 {
 471                         unsigned long amount;
 472 
 473                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 474                         amount = sk->prot->wspace(sk);
 475                         err=verify_area(VERIFY_WRITE,(void *)arg,
 476                                                    sizeof(unsigned long));
 477                         if(err)
 478                                 return err;
 479                         put_fs_long(amount,(unsigned long *)arg);
 480                         return(0);
 481                 }
 482         default:
 483                 return(-EINVAL);
 484   }
 485 }
 486 
 487 
 488 /* This routine computes a TCP checksum. */
 489 unsigned short
 490 tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 491           unsigned long saddr, unsigned long daddr)
 492 {     
 493   unsigned long sum;
 494    
 495   if (saddr == 0) saddr = ip_my_addr();
 496   print_th(th);
 497   __asm__("\t addl %%ecx,%%ebx\n"
 498           "\t adcl %%edx,%%ebx\n"
 499           "\t adcl $0, %%ebx\n"
 500           : "=b"(sum)
 501           : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 502           : "cx","bx","dx" );
 503    
 504   if (len > 3) {
 505         __asm__("\tclc\n"
 506                 "1:\n"
 507                 "\t lodsl\n"
 508                 "\t adcl %%eax, %%ebx\n"
 509                 "\t loop 1b\n"
 510                 "\t adcl $0, %%ebx\n"
 511                 : "=b"(sum) , "=S"(th)
 512                 : "0"(sum), "c"(len/4) ,"1"(th)
 513                 : "ax", "cx", "bx", "si" );
 514   }
 515    
 516   /* Convert from 32 bits to 16 bits. */
 517   __asm__("\t movl %%ebx, %%ecx\n"
 518           "\t shrl $16,%%ecx\n"
 519           "\t addw %%cx, %%bx\n"
 520           "\t adcw $0, %%bx\n"
 521           : "=b"(sum)
 522           : "0"(sum)
 523           : "bx", "cx");
 524    
 525   /* Check for an extra word. */
 526   if ((len & 2) != 0) {
 527         __asm__("\t lodsw\n"
 528                 "\t addw %%ax,%%bx\n"
 529                 "\t adcw $0, %%bx\n"
 530                 : "=b"(sum), "=S"(th)
 531                 : "0"(sum) ,"1"(th)
 532                 : "si", "ax", "bx");
 533   }
 534    
 535   /* Now check for the extra byte. */
 536   if ((len & 1) != 0) {
 537         __asm__("\t lodsb\n"
 538                 "\t movb $0,%%ah\n"
 539                 "\t addw %%ax,%%bx\n"
 540                 "\t adcw $0, %%bx\n"
 541                 : "=b"(sum)
 542                 : "0"(sum) ,"S"(th)
 543                 : "si", "ax", "bx");
 544   }
 545    
 546   /* We only want the bottom 16 bits, but we never cleared the top 16. */
 547   return((~sum) & 0xffff);
 548 }
 549 
 550 
 551 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 552                 unsigned long daddr, int len, struct sock *sk)
 553 {
 554         th->check = 0;
 555         th->check = tcp_check(th, len, saddr, daddr);
 556         return;
 557 }
 558 
 559 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 560 {
 561         int size;
 562         struct tcphdr * th = skb->h.th;
 563 
 564         /* length of packet (not counting length of pre-tcp headers) */
 565         size = skb->len - ((unsigned char *) th - skb->data);
 566 
 567         /* sanity check it.. */
 568         if (size < sizeof(struct tcphdr) || size > skb->len) {
 569                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 570                         skb, skb->data, th, skb->len);
 571                 kfree_skb(skb, FREE_WRITE);
 572                 return;
 573         }
 574 
 575         /* If we have queued a header size packet.. */
 576         if (size == sizeof(struct tcphdr)) {
 577                 /* If its got a syn or fin its notionally included in the size..*/
 578                 if(!th->syn && !th->fin) {
 579                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 580                         kfree_skb(skb,FREE_WRITE);
 581                         return;
 582                 }
 583         }
 584 
 585         tcp_statistics.TcpOutSegs++;  
 586         /* We need to complete and send the packet. */
 587         tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
 588 
 589         skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
 590         if (after(skb->h.seq, sk->window_seq) ||
 591             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 592              sk->packets_out >= sk->cong_window) {
 593                 DPRINTF((DBG_TCP, "sk->cong_window = %d, sk->packets_out = %d\n",
 594                                         sk->cong_window, sk->packets_out));
 595                 DPRINTF((DBG_TCP, "sk->write_seq = %d, sk->window_seq = %d\n",
 596                                         sk->write_seq, sk->window_seq));
 597                 if (skb->next != NULL) {
 598                         printk("tcp_send_partial: next != NULL\n");
 599                         skb_unlink(skb);
 600                 }
 601                 skb_queue_tail(&sk->write_queue, skb);
 602                 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 603                     sk->send_head == NULL &&
 604                     sk->ack_backlog == 0)
 605                   reset_timer(sk, TIME_PROBE0, sk->rto);
 606         } else {
 607                 sk->sent_seq = sk->write_seq;
 608                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 609         }
 610 }
 611 
 612 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 613 {
 614         struct sk_buff * skb;
 615         unsigned long flags;
 616 
 617         save_flags(flags);
 618         cli();
 619         skb = sk->partial;
 620         if (skb) {
 621                 sk->partial = NULL;
 622                 del_timer(&sk->partial_timer);
 623         }
 624         restore_flags(flags);
 625         return skb;
 626 }
 627 
 628 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 629 {
 630         struct sk_buff *skb;
 631 
 632         if (sk == NULL)
 633                 return;
 634         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 635                 tcp_send_skb(sk, skb);
 636 }
 637 
 638 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 639 {
 640         struct sk_buff * tmp;
 641         unsigned long flags;
 642 
 643         save_flags(flags);
 644         cli();
 645         tmp = sk->partial;
 646         if (tmp)
 647                 del_timer(&sk->partial_timer);
 648         sk->partial = skb;
 649         sk->partial_timer.expires = HZ;
 650         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 651         sk->partial_timer.data = (unsigned long) sk;
 652         add_timer(&sk->partial_timer);
 653         restore_flags(flags);
 654         if (tmp)
 655                 tcp_send_skb(sk, tmp);
 656 }
 657 
 658 
 659 /* This routine sends an ack and also updates the window. */
 660 static void
 661 tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 662              struct sock *sk,
 663              struct tcphdr *th, unsigned long daddr)
 664 {
 665   struct sk_buff *buff;
 666   struct tcphdr *t1;
 667   struct device *dev = NULL;
 668   int tmp;
 669 
 670   if(sk->zapped)
 671         return;         /* We have been reset, we may not send again */
 672   /*
 673    * We need to grab some memory, and put together an ack,
 674    * and then put it into the queue to be sent.
 675    */
 676   buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 677   if (buff == NULL) {
 678         /* Force it to send an ack. */
 679         sk->ack_backlog++;
 680         if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) {
 681                 reset_timer(sk, TIME_WRITE, 10);
 682         }
 683   if (inet_debug == DBG_SLIP) printk("\rtcp_ack: malloc failed\n");
 684         return;
 685   }
 686 
 687   buff->len = sizeof(struct tcphdr);
 688   buff->sk = sk;
 689   t1 =(struct tcphdr *) buff->data;
 690 
 691   /* Put in the IP header and routing stuff. */
 692   tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 693                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 694   if (tmp < 0) {
 695         buff->free=1;
 696         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 697   if (inet_debug == DBG_SLIP) printk("\rtcp_ack: build_header failed\n");
 698         return;
 699   }
 700   buff->len += tmp;
 701   t1 =(struct tcphdr *)((char *)t1 +tmp);
 702 
 703   /* FIXME: */
 704   memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 705 
 706   /* swap the send and the receive. */
 707   t1->dest = th->source;
 708   t1->source = th->dest;
 709   t1->seq = ntohl(sequence);
 710   t1->ack = 1;
 711   sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
 712   t1->window = ntohs(sk->window);
 713   t1->res1 = 0;
 714   t1->res2 = 0;
 715   t1->rst = 0;
 716   t1->urg = 0;
 717   t1->syn = 0;
 718   t1->psh = 0;
 719   t1->fin = 0;
 720   if (ack == sk->acked_seq) {
 721         sk->ack_backlog = 0;
 722         sk->bytes_rcv = 0;
 723         sk->ack_timed = 0;
 724         if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
 725                                   && sk->timeout == TIME_WRITE) 
 726         {
 727                 if(sk->keepopen)
 728                         reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 729                 else
 730                         delete_timer(sk);
 731         }
 732   }
 733   t1->ack_seq = ntohl(ack);
 734   t1->doff = sizeof(*t1)/4;
 735   tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 736   if (sk->debug)
 737          printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 738   tcp_statistics.TcpOutSegs++;
 739   sk->prot->queue_xmit(sk, dev, buff, 1);
 740 }
 741 
 742 
 743 /* This routine builds a generic TCP header. */
 744 static int
 745 tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 746 {
 747 
 748   /* FIXME: want to get rid of this. */
 749   memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 750   th->seq = htonl(sk->write_seq);
 751   th->psh =(push == 0) ? 1 : 0;
 752   th->doff = sizeof(*th)/4;
 753   th->ack = 1;
 754   th->fin = 0;
 755   sk->ack_backlog = 0;
 756   sk->bytes_rcv = 0;
 757   sk->ack_timed = 0;
 758   th->ack_seq = htonl(sk->acked_seq);
 759   sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/;
 760   th->window = htons(sk->window);
 761 
 762   return(sizeof(*th));
 763 }
 764 
 765 /*
 766  * This routine copies from a user buffer into a socket,
 767  * and starts the transmit system.
 768  */
 769 static int
 770 tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 771           int len, int nonblock, unsigned flags)
 772 {
 773   int copied = 0;
 774   int copy;
 775   int tmp;
 776   struct sk_buff *skb;
 777   struct sk_buff *send_tmp;
 778   unsigned char *buff;
 779   struct proto *prot;
 780   struct device *dev = NULL;
 781 
 782   DPRINTF((DBG_TCP, "tcp_write(sk=%X, from=%X, len=%d, nonblock=%d, flags=%X)\n",
 783                                         sk, from, len, nonblock, flags));
 784 
 785   sk->inuse=1;
 786   prot = sk->prot;
 787   while(len > 0) {
 788         if (sk->err) {                  /* Stop on an error */
 789                 release_sock(sk);
 790                 if (copied) return(copied);
 791                 tmp = -sk->err;
 792                 sk->err = 0;
 793                 return(tmp);
 794         }
 795 
 796         /* First thing we do is make sure that we are established. */    
 797         if (sk->shutdown & SEND_SHUTDOWN) {
 798                 release_sock(sk);
 799                 sk->err = EPIPE;
 800                 if (copied) return(copied);
 801                 sk->err = 0;
 802                 return(-EPIPE);
 803         }
 804 
 805 
 806         /* Wait for a connection to finish. */
 807         
 808         while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) {
 809                 if (sk->err) {
 810                         release_sock(sk);
 811                         if (copied) return(copied);
 812                         tmp = -sk->err;
 813                         sk->err = 0;
 814                         return(tmp);
 815                 }
 816 
 817                 if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) {
 818                         release_sock(sk);
 819                         DPRINTF((DBG_TCP, "tcp_write: return 1\n"));
 820                         if (copied) return(copied);
 821 
 822                         if (sk->err) {
 823                                 tmp = -sk->err;
 824                                 sk->err = 0;
 825                                 return(tmp);
 826                         }
 827 
 828                         if (sk->keepopen) {
 829                                 send_sig(SIGPIPE, current, 0);
 830                         }
 831                         return(-EPIPE);
 832                 }
 833 
 834                 if (nonblock || copied) {
 835                         release_sock(sk);
 836                         DPRINTF((DBG_TCP, "tcp_write: return 2\n"));
 837                         if (copied) return(copied);
 838                         return(-EAGAIN);
 839                 }
 840 
 841                 release_sock(sk);
 842                 cli();
 843                 if (sk->state != TCP_ESTABLISHED &&
 844                     sk->state != TCP_CLOSE_WAIT && sk->err == 0) {
 845                         interruptible_sleep_on(sk->sleep);
 846                         if (current->signal & ~current->blocked) {
 847                                 sti();
 848                                 DPRINTF((DBG_TCP, "tcp_write: return 3\n"));
 849                                 if (copied) return(copied);
 850                                 return(-ERESTARTSYS);
 851                         }
 852                 }
 853                 sk->inuse = 1;
 854                 sti();
 855         }
 856 
 857 /*
 858  * The following code can result in copy <= if sk->mss is ever
 859  * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
 860  * sk->mtu is constant once SYN processing is finished.  I.e. we
 861  * had better not get here until we've seen his SYN and at least one
 862  * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
 863  * But ESTABLISHED should guarantee that.  sk->max_window is by definition
 864  * non-decreasing.  Note that any ioctl to set user_mss must be done
 865  * before the exchange of SYN's.  If the initial ack from the other
 866  * end has a window of 0, max_window and thus mss will both be 0.
 867  */
 868 
 869         /* Now we need to check if we have a half built packet. */
 870         if ((skb = tcp_dequeue_partial(sk)) != NULL) {
 871                 int hdrlen;
 872 
 873                  /* IP header + TCP header */
 874                 hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
 875                          + sizeof(struct tcphdr);
 876 
 877                 /* Add more stuff to the end of skb->len */
 878                 if (!(flags & MSG_OOB)) {
 879                         copy = min(sk->mss - (skb->len - hdrlen), len);
 880                         /* FIXME: this is really a bug. */
 881                         if (copy <= 0) {
 882                           printk("TCP: **bug**: \"copy\" <= 0!!\n");
 883                           copy = 0;
 884                         }
 885           
 886                         memcpy_fromfs(skb->data + skb->len, from, copy);
 887                         skb->len += copy;
 888                         from += copy;
 889                         copied += copy;
 890                         len -= copy;
 891                         sk->write_seq += copy;
 892                       }
 893                 if ((skb->len - hdrlen) >= sk->mss ||
 894                     (flags & MSG_OOB) ||
 895                     !sk->packets_out)
 896                         tcp_send_skb(sk, skb);
 897                 else
 898                         tcp_enqueue_partial(skb, sk);
 899                 continue;
 900         }
 901 
 902         /*
 903          * We also need to worry about the window.
 904          * If window < 1/2 the maximum window we've seen from this
 905          *   host, don't use it.  This is sender side
 906          *   silly window prevention, as specified in RFC1122.
 907          *   (Note that this is diffferent than earlier versions of
 908          *   SWS prevention, e.g. RFC813.).  What we actually do is 
 909          *   use the whole MSS.  Since the results in the right
 910          *   edge of the packet being outside the window, it will
 911          *   be queued for later rather than sent.
 912          */
 913 
 914         copy = sk->window_seq - sk->write_seq;
 915         if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
 916                 copy = sk->mss;
 917         if (copy > len)
 918                 copy = len;
 919 
 920   /* We should really check the window here also. */
 921         send_tmp = NULL;
 922         if (copy < sk->mss && !(flags & MSG_OOB)) {
 923         /* We will release the socket incase we sleep here. */
 924           release_sock(sk);
 925           /* NB: following must be mtu, because mss can be increased.
 926            * mss is always <= mtu */
 927           skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
 928           sk->inuse = 1;
 929           send_tmp = skb;
 930         } else {
 931                 /* We will release the socket incase we sleep here. */
 932           release_sock(sk);
 933           skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
 934           sk->inuse = 1;
 935         }
 936 
 937         /* If we didn't get any memory, we need to sleep. */
 938         if (skb == NULL) {
 939                 if (nonblock /* || copied */) {
 940                         release_sock(sk);
 941                         DPRINTF((DBG_TCP, "tcp_write: return 4\n"));
 942                         if (copied) return(copied);
 943                         return(-EAGAIN);
 944                 }
 945 
 946                 /* FIXME: here is another race condition. */
 947                 tmp = sk->wmem_alloc;
 948                 release_sock(sk);
 949                 cli();
 950                 /* Again we will try to avoid it. */
 951                 if (tmp <= sk->wmem_alloc &&
 952                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
 953                                 && sk->err == 0) {
 954                         interruptible_sleep_on(sk->sleep);
 955                         if (current->signal & ~current->blocked) {
 956                                 sti();
 957                                 DPRINTF((DBG_TCP, "tcp_write: return 5\n"));
 958                                 if (copied) return(copied);
 959                                 return(-ERESTARTSYS);
 960                         }
 961                 }
 962                 sk->inuse = 1;
 963                 sti();
 964                 continue;
 965         }
 966 
 967         skb->len = 0;
 968         skb->sk = sk;
 969         skb->free = 0;
 970 
 971         buff = skb->data;
 972 
 973         /*
 974          * FIXME: we need to optimize this.
 975          * Perhaps some hints here would be good.
 976          */
 977         tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
 978                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
 979         if (tmp < 0 ) {
 980                 prot->wfree(sk, skb->mem_addr, skb->mem_len);
 981                 release_sock(sk);
 982                 DPRINTF((DBG_TCP, "tcp_write: return 6\n"));
 983                 if (copied) return(copied);
 984                 return(tmp);
 985         }
 986         skb->len += tmp;
 987         skb->dev = dev;
 988         buff += tmp;
 989         skb->h.th =(struct tcphdr *) buff;
 990         tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
 991         if (tmp < 0) {
 992                 prot->wfree(sk, skb->mem_addr, skb->mem_len);
 993                 release_sock(sk);
 994                 DPRINTF((DBG_TCP, "tcp_write: return 7\n"));
 995                 if (copied) return(copied);
 996                 return(tmp);
 997         }
 998 
 999         if (flags & MSG_OOB) {
1000                 ((struct tcphdr *)buff)->urg = 1;
1001                 ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1002         }
1003         skb->len += tmp;
1004         memcpy_fromfs(buff+tmp, from, copy);
1005 
1006         from += copy;
1007         copied += copy;
1008         len -= copy;
1009         skb->len += copy;
1010         skb->free = 0;
1011         sk->write_seq += copy;
1012 
1013         if (send_tmp != NULL && sk->packets_out) {
1014                 tcp_enqueue_partial(send_tmp, sk);
1015                 continue;
1016         }
1017         tcp_send_skb(sk, skb);
1018   }
1019   sk->err = 0;
1020 
1021 /*
1022  *      Nagles rule. Turn Nagle off with TCP_NODELAY for highly
1023  *      interactive fast network servers. It's meant to be on and
1024  *      it really improves the throughput though not the echo time
1025  *      on my slow slip link - Alan
1026  */
1027 
1028   /* Avoid possible race on send_tmp - c/o Johannes Stille */
1029   if(sk->partial && 
1030      ((!sk->packets_out) 
1031      /* If not nagling we can send on the before case too.. */
1032       || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1033       ))
1034         tcp_send_partial(sk);
1035   /* -- */
1036   release_sock(sk);
1037   DPRINTF((DBG_TCP, "tcp_write: return 8\n"));
1038   return(copied);
1039 }
1040 
1041 
1042 static int
1043 tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1044            int len, int nonblock, unsigned flags,
1045            struct sockaddr_in *addr, int addr_len)
1046 {
1047   struct sockaddr_in sin;
1048 
1049   if (addr_len < sizeof(sin)) return(-EINVAL);
1050   memcpy_fromfs(&sin, addr, sizeof(sin));
1051   if (sin.sin_family && sin.sin_family != AF_INET) return(-EINVAL);
1052   if (sin.sin_port != sk->dummy_th.dest) return(-EINVAL);
1053   if (sin.sin_addr.s_addr != sk->daddr) return(-EINVAL);
1054   return(tcp_write(sk, from, len, nonblock, flags));
1055 }
1056 
1057 
1058 static void
1059 tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1060 {
1061   int tmp;
1062   struct device *dev = NULL;
1063   struct tcphdr *t1;
1064   struct sk_buff *buff;
1065 
1066   DPRINTF((DBG_TCP, "in tcp read wakeup\n"));
1067   if (!sk->ack_backlog) return;
1068 
1069   /*
1070    * FIXME: we need to put code here to prevent this routine from
1071    * being called.  Being called once in a while is ok, so only check
1072    * if this is the second time in a row.
1073    */
1074 
1075   /*
1076    * We need to grab some memory, and put together an ack,
1077    * and then put it into the queue to be sent.
1078    */
1079   buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1080   if (buff == NULL) {
1081         /* Try again real soon. */
1082         reset_timer(sk, TIME_WRITE, 10);
1083         return;
1084   }
1085 
1086   buff->len = sizeof(struct tcphdr);
1087   buff->sk = sk;
1088 
1089   /* Put in the IP header and routing stuff. */
1090   tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1091                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1092   if (tmp < 0) {
1093         buff->free=1;
1094         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1095         return;
1096   }
1097 
1098   buff->len += tmp;
1099   t1 =(struct tcphdr *)(buff->data +tmp);
1100 
1101   memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1102   t1->seq = htonl(sk->sent_seq);
1103   t1->ack = 1;
1104   t1->res1 = 0;
1105   t1->res2 = 0;
1106   t1->rst = 0;
1107   t1->urg = 0;
1108   t1->syn = 0;
1109   t1->psh = 0;
1110   sk->ack_backlog = 0;
1111   sk->bytes_rcv = 0;
1112   sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
1113   t1->window = ntohs(sk->window);
1114   t1->ack_seq = ntohl(sk->acked_seq);
1115   t1->doff = sizeof(*t1)/4;
1116   tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1117   sk->prot->queue_xmit(sk, dev, buff, 1);
1118   tcp_statistics.TcpOutSegs++;
1119 }
1120 
1121 
1122 /*
1123  * FIXME:
1124  * This routine frees used buffers.
1125  * It should consider sending an ACK to let the
1126  * other end know we now have a bigger window.
1127  */
1128 static void
1129 cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1130 {
1131   unsigned long flags;
1132   int left;
1133   struct sk_buff *skb;
1134 
1135   if(sk->debug)
1136         printk("cleaning rbuf for sk=%p\n", sk);
1137   
1138   save_flags(flags);
1139   cli();
1140   
1141   left = sk->prot->rspace(sk);
1142  
1143   /*
1144    * We have to loop through all the buffer headers,
1145    * and try to free up all the space we can.
1146    */
1147   while((skb=skb_peek(&sk->receive_queue)) != NULL) 
1148   {
1149         if (!skb->used) 
1150                 break;
1151         skb_unlink(skb);
1152         skb->sk = sk;
1153         kfree_skb(skb, FREE_READ);
1154   }
1155 
1156   restore_flags(flags);
1157 
1158   /*
1159    * FIXME:
1160    * At this point we should send an ack if the difference
1161    * in the window, and the amount of space is bigger than
1162    * TCP_WINDOW_DIFF.
1163    */
1164   DPRINTF((DBG_TCP, "sk->window left = %d, sk->prot->rspace(sk)=%d\n",
1165                         sk->window - sk->bytes_rcv, sk->prot->rspace(sk)));
1166 
1167   if(sk->debug)
1168         printk("sk->rspace = %lu, was %d\n", sk->prot->rspace(sk),
1169                                             left);
1170   if (sk->prot->rspace(sk) != left) 
1171   {
1172         /*
1173          * This area has caused the most trouble.  The current strategy
1174          * is to simply do nothing if the other end has room to send at
1175          * least 3 full packets, because the ack from those will auto-
1176          * matically update the window.  If the other end doesn't think
1177          * we have much space left, but we have room for atleast 1 more
1178          * complete packet than it thinks we do, we will send an ack
1179          * immediatedly.  Otherwise we will wait up to .5 seconds in case
1180          * the user reads some more.
1181          */
1182         sk->ack_backlog++;
1183 /*
1184  * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1185  * if the other end is offering a window smaller than the agreed on MSS
1186  * (called sk->mtu here).  In theory there's no connection between send
1187  * and receive, and so no reason to think that they're going to send
1188  * small packets.  For the moment I'm using the hack of reducing the mss
1189  * only on the send side, so I'm putting mtu here.
1190  */
1191         if ((sk->prot->rspace(sk) > (sk->window - sk->bytes_rcv + sk->mtu))) {
1192                 /* Send an ack right now. */
1193                 tcp_read_wakeup(sk);
1194         } else {
1195                 /* Force it to send an ack soon. */
1196                 int was_active = del_timer(&sk->timer);
1197                 if (!was_active || TCP_ACK_TIME < sk->timer.expires) {
1198                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1199                 } else
1200                         add_timer(&sk->timer);
1201         }
1202   }
1203 } 
1204 
1205 
1206 /* Handle reading urgent data. */
1207 static int
1208 tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1209              unsigned char *to, int len, unsigned flags)
1210 {
1211         struct wait_queue wait = { current, NULL };
1212 
1213         while (len > 0) {
1214                 if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1215                         return -EINVAL;
1216                 if (sk->urg_data & URG_VALID) {
1217                         char c = sk->urg_data;
1218                         if (!(flags & MSG_PEEK))
1219                                 sk->urg_data = URG_READ;
1220                         put_fs_byte(c, to);
1221                         return 1;
1222                 }
1223 
1224                 if (sk->err) {
1225                         int tmp = -sk->err;
1226                         sk->err = 0;
1227                         return tmp;
1228                 }
1229 
1230                 if (sk->state == TCP_CLOSE || sk->done) {
1231                         if (!sk->done) {
1232                                 sk->done = 1;
1233                                 return 0;
1234                         }
1235                         return -ENOTCONN;
1236                 }
1237 
1238                 if (sk->shutdown & RCV_SHUTDOWN) {
1239                         sk->done = 1;
1240                         return 0;
1241                 }
1242 
1243                 if (nonblock)
1244                         return -EAGAIN;
1245 
1246                 if (current->signal & ~current->blocked)
1247                         return -ERESTARTSYS;
1248 
1249                 current->state = TASK_INTERRUPTIBLE;
1250                 add_wait_queue(sk->sleep, &wait);
1251                 if ((sk->urg_data & URG_NOTYET) && sk->err == 0 &&
1252                     !(sk->shutdown & RCV_SHUTDOWN))
1253                         schedule();
1254                 remove_wait_queue(sk->sleep, &wait);
1255                 current->state = TASK_RUNNING;
1256         }
1257         return 0;
1258 }
1259 
1260 
1261 /* This routine copies from a sock struct into the user buffer. */
1262 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1263         int len, int nonblock, unsigned flags)
1264 {
1265         struct wait_queue wait = { current, NULL };
1266         int copied = 0;
1267         unsigned long peek_seq;
1268         unsigned long *seq;
1269         unsigned long used;
1270         int err;
1271 
1272         if (len == 0)
1273                 return 0;
1274 
1275         if (len < 0)
1276                 return -EINVAL;
1277 
1278         err = verify_area(VERIFY_WRITE, to, len);
1279         if (err)
1280                 return err;
1281 
1282         /* This error should be checked. */
1283         if (sk->state == TCP_LISTEN)
1284                 return -ENOTCONN;
1285 
1286         /* Urgent data needs to be handled specially. */
1287         if (flags & MSG_OOB)
1288                 return tcp_read_urg(sk, nonblock, to, len, flags);
1289 
1290         peek_seq = sk->copied_seq;
1291         seq = &sk->copied_seq;
1292         if (flags & MSG_PEEK)
1293                 seq = &peek_seq;
1294 
1295         add_wait_queue(sk->sleep, &wait);
1296         sk->inuse = 1;
1297         while (len > 0) {
1298                 struct sk_buff * skb;
1299                 unsigned long offset;
1300         
1301                 /*
1302                  * are we at urgent data? Stop if we have read anything.
1303                  */
1304                 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1305                         break;
1306 
1307                 current->state = TASK_INTERRUPTIBLE;
1308 
1309                 skb = skb_peek(&sk->receive_queue);
1310                 do {
1311                         if (!skb)
1312                                 break;
1313                         if (before(1+*seq, skb->h.th->seq))
1314                                 break;
1315                         offset = 1 + *seq - skb->h.th->seq;
1316                         if (skb->h.th->syn)
1317                                 offset--;
1318                         if (offset < skb->len)
1319                                 goto found_ok_skb;
1320                         if (!(flags & MSG_PEEK))
1321                                 skb->used = 1;
1322                         skb = skb->next;
1323                 } while (skb != (struct sk_buff *)&sk->receive_queue);
1324 
1325                 if (copied)
1326                         break;
1327 
1328                 if (sk->err) {
1329                         copied = -sk->err;
1330                         sk->err = 0;
1331                         break;
1332                 }
1333 
1334                 if (sk->state == TCP_CLOSE) {
1335                         if (!sk->done) {
1336                                 sk->done = 1;
1337                                 break;
1338                         }
1339                         copied = -ENOTCONN;
1340                         break;
1341                 }
1342 
1343                 if (sk->shutdown & RCV_SHUTDOWN) {
1344                         sk->done = 1;
1345                         break;
1346                 }
1347                         
1348                 if (nonblock) {
1349                         copied = -EAGAIN;
1350                         break;
1351                 }
1352 
1353                 cleanup_rbuf(sk);
1354                 release_sock(sk);
1355                 schedule();
1356                 sk->inuse = 1;
1357 
1358                 if (current->signal & ~current->blocked) {
1359                         copied = -ERESTARTSYS;
1360                         break;
1361                 }
1362                 continue;
1363 
1364         found_ok_skb:
1365                 /* Ok so how much can we use ? */
1366                 used = skb->len - offset;
1367                 if (len < used)
1368                         used = len;
1369                 /* do we have urgent data here? */
1370                 if (sk->urg_data) {
1371                         unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1372                         if (urg_offset < used) {
1373                                 if (!urg_offset) {
1374                                         if (!sk->urginline) {
1375                                                 ++*seq;
1376                                                 offset++;
1377                                                 used--;
1378                                         }
1379                                 } else
1380                                         used = urg_offset;
1381                         }
1382                 }
1383                 /* Copy it */
1384                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1385                         skb->h.th->doff*4 + offset, used);
1386                 copied += used;
1387                 len -= used;
1388                 to += used;
1389                 *seq += used;
1390                 if (after(sk->copied_seq+1,sk->urg_seq))
1391                         sk->urg_data = 0;
1392                 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1393                         skb->used = 1;
1394         }
1395         remove_wait_queue(sk->sleep, &wait);
1396         current->state = TASK_RUNNING;
1397 
1398         /* Clean up data we have read: This will do ACK frames */
1399         cleanup_rbuf(sk);
1400         release_sock(sk);
1401         DPRINTF((DBG_TCP, "tcp_read: returning %d\n", copied));
1402         return copied;
1403 }
1404 
1405  
1406 /*
1407  * Send a FIN without closing the connection.
1408  * Not called at interrupt time.
1409  */
1410 void
1411 tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1412 {
1413   struct sk_buff *buff;
1414   struct tcphdr *t1, *th;
1415   struct proto *prot;
1416   int tmp;
1417   struct device *dev = NULL;
1418 
1419   /*
1420    * We need to grab some memory, and put together a FIN,
1421    * and then put it into the queue to be sent.
1422    * FIXME:
1423    *    Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1424    *    Most of this is guesswork, so maybe it will work...
1425    */
1426   /* If we've already sent a FIN, return. */
1427   if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2) return;
1428   if (!(how & SEND_SHUTDOWN)) return;
1429   sk->inuse = 1;
1430 
1431   /* Clear out any half completed packets. */
1432   if (sk->partial)
1433         tcp_send_partial(sk);
1434 
1435   prot =(struct proto *)sk->prot;
1436   th =(struct tcphdr *)&sk->dummy_th;
1437   release_sock(sk); /* incase the malloc sleeps. */
1438   buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1439   if (buff == NULL) return;
1440   sk->inuse = 1;
1441 
1442   DPRINTF((DBG_TCP, "tcp_shutdown_send buff = %X\n", buff));
1443   buff->sk = sk;
1444   buff->len = sizeof(*t1);
1445   t1 =(struct tcphdr *) buff->data;
1446 
1447   /* Put in the IP header and routing stuff. */
1448   tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1449                            IPPROTO_TCP, sk->opt,
1450                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1451   if (tmp < 0) {
1452         /* Finish anyway, treat this as a send that got lost. */
1453         buff->free=1;
1454         prot->wfree(sk,buff->mem_addr, buff->mem_len);
1455         if(sk->state==TCP_ESTABLISHED)
1456                 sk->state=TCP_FIN_WAIT1;
1457         else
1458                 sk->state=TCP_FIN_WAIT2;
1459         release_sock(sk);
1460         DPRINTF((DBG_TCP, "Unable to build header for fin.\n"));
1461         return;
1462   }
1463 
1464   t1 =(struct tcphdr *)((char *)t1 +tmp);
1465   buff->len += tmp;
1466   buff->dev = dev;
1467   memcpy(t1, th, sizeof(*t1));
1468   t1->seq = ntohl(sk->write_seq);
1469   sk->write_seq++;
1470   buff->h.seq = sk->write_seq;
1471   t1->ack = 1;
1472   t1->ack_seq = ntohl(sk->acked_seq);
1473   t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
1474   t1->fin = 1;
1475   t1->rst = 0;
1476   t1->doff = sizeof(*t1)/4;
1477   tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1478 
1479   /*
1480    * Can't just queue this up.
1481    * It should go at the end of the write queue.
1482    */
1483   if (skb_peek(&sk->write_queue) != NULL) {
1484         buff->free=0;
1485         if (buff->next != NULL) {
1486                 printk("tcp_shutdown: next != NULL\n");
1487                 skb_unlink(buff);
1488         }
1489         skb_queue_tail(&sk->write_queue, buff);
1490   } else {
1491         sk->sent_seq = sk->write_seq;
1492         sk->prot->queue_xmit(sk, dev, buff, 0);
1493   }
1494 
1495   if (sk->state == TCP_ESTABLISHED) sk->state = TCP_FIN_WAIT1;
1496     else sk->state = TCP_FIN_WAIT2;
1497 
1498   release_sock(sk);
1499 }
1500 
1501 
1502 static int
1503 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1504              int to_len, int nonblock, unsigned flags,
1505              struct sockaddr_in *addr, int *addr_len)
1506 {
1507   struct sockaddr_in sin;
1508   int len;
1509   int err;
1510   int result;
1511   
1512   /* Have to check these first unlike the old code. If 
1513      we check them after we lose data on an error
1514      which is wrong */
1515   err = verify_area(VERIFY_WRITE,addr_len,sizeof(long));
1516   if(err)
1517         return err;
1518   len = get_fs_long(addr_len);
1519   if(len > sizeof(sin))
1520         len = sizeof(sin);
1521   err=verify_area(VERIFY_WRITE, addr, len);  
1522   if(err)
1523         return err;
1524         
1525   result=tcp_read(sk, to, to_len, nonblock, flags);
1526 
1527   if (result < 0) return(result);
1528   
1529   sin.sin_family = AF_INET;
1530   sin.sin_port = sk->dummy_th.dest;
1531   sin.sin_addr.s_addr = sk->daddr;
1532 
1533   memcpy_tofs(addr, &sin, len);
1534   put_fs_long(len, addr_len);
1535   return(result);
1536 }
1537 
1538 
1539 /* This routine will send an RST to the other tcp. */
1540 static void
1541 tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1542           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1543 {
1544   struct sk_buff *buff;
1545   struct tcphdr *t1;
1546   int tmp;
1547   struct device *ndev=NULL;
1548   
1549   /*
1550    * We need to grab some memory, and put together an RST,
1551    * and then put it into the queue to be sent.
1552    */
1553   buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1554   if (buff == NULL) 
1555         return;
1556 
1557   DPRINTF((DBG_TCP, "tcp_reset buff = %X\n", buff));
1558   buff->len = sizeof(*t1);
1559   buff->sk = NULL;
1560   buff->dev = dev;
1561 
1562   t1 =(struct tcphdr *) buff->data;
1563 
1564   /* Put in the IP header and routing stuff. */
1565   tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
1566                            sizeof(struct tcphdr),tos,ttl);
1567   if (tmp < 0) {
1568         buff->free = 1;
1569         prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1570         return;
1571   }
1572   t1 =(struct tcphdr *)((char *)t1 +tmp);
1573   buff->len += tmp;
1574   memcpy(t1, th, sizeof(*t1));
1575 
1576   /* Swap the send and the receive. */
1577   t1->dest = th->source;
1578   t1->source = th->dest;
1579   t1->rst = 1;  
1580   t1->window = 0;
1581   
1582   if(th->ack)
1583   {
1584         t1->ack = 0;
1585         t1->seq = th->ack_seq;
1586         t1->ack_seq = 0;
1587   }
1588   else
1589   {
1590         t1->ack = 1;
1591         if(!th->syn)
1592                 t1->ack_seq=htonl(th->seq);
1593         else
1594                 t1->ack_seq=htonl(th->seq+1);
1595         t1->seq=0;
1596   }
1597 
1598   t1->syn = 0;
1599   t1->urg = 0;
1600   t1->fin = 0;
1601   t1->psh = 0;
1602   t1->doff = sizeof(*t1)/4;
1603   tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1604   prot->queue_xmit(NULL, dev, buff, 1);
1605   tcp_statistics.TcpOutSegs++;
1606 }
1607 
1608 
1609 /*
1610  *      Look for tcp options. Parses everything but only knows about MSS.
1611  *      This routine is always called with the packet containing the SYN.
1612  *      However it may also be called with the ack to the SYN.  So you
1613  *      can't assume this is always the SYN.  It's always called after
1614  *      we have set up sk->mtu to our own MTU.
1615  */
1616  
1617 static void
1618 tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1619 {
1620   unsigned char *ptr;
1621   int length=(th->doff*4)-sizeof(struct tcphdr);
1622   int mss_seen = 0;
1623     
1624   ptr = (unsigned char *)(th + 1);
1625   
1626   while(length>0)
1627   {
1628         int opcode=*ptr++;
1629         int opsize=*ptr++;
1630         switch(opcode)
1631         {
1632                 case TCPOPT_EOL:
1633                         return;
1634                 case TCPOPT_NOP:
1635                         length-=2;
1636                         continue;
1637                 
1638                 default:
1639                         if(opsize<=2)   /* Avoid silly options looping forever */
1640                                 return;
1641                         switch(opcode)
1642                         {
1643                                 case TCPOPT_MSS:
1644                                         if(opsize==4 && th->syn)
1645                                         {
1646                                                 sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1647                                                 mss_seen = 1;
1648                                         }
1649                                         break;
1650                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1651                         }
1652                         ptr+=opsize-2;
1653                         length-=opsize;
1654         }
1655   }
1656   if (th->syn) {
1657     if (! mss_seen)
1658       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1659   }
1660   sk->mss = min(sk->max_window, sk->mtu);
1661 }
1662 
1663 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1664 {
1665         dst = ntohl(dst);
1666         if (IN_CLASSA(dst))
1667                 return htonl(IN_CLASSA_NET);
1668         if (IN_CLASSB(dst))
1669                 return htonl(IN_CLASSB_NET);
1670         return htonl(IN_CLASSC_NET);
1671 }
1672 
1673 /*
1674  * This routine handles a connection request.
1675  * It should make sure we haven't already responded.
1676  * Because of the way BSD works, we have to send a syn/ack now.
1677  * This also means it will be harder to close a socket which is
1678  * listening.
1679  */
1680 static void
1681 tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1682                  unsigned long daddr, unsigned long saddr,
1683                  struct options *opt, struct device *dev)
1684 {
1685   struct sk_buff *buff;
1686   struct tcphdr *t1;
1687   unsigned char *ptr;
1688   struct sock *newsk;
1689   struct tcphdr *th;
1690   struct device *ndev=NULL;
1691   int tmp;
1692 
1693   DPRINTF((DBG_TCP, "tcp_conn_request(sk = %X, skb = %X, daddr = %X, sadd4= %X, \n"
1694           "                  opt = %X, dev = %X)\n",
1695           sk, skb, daddr, saddr, opt, dev));
1696   
1697   th = skb->h.th;
1698 
1699   /* If the socket is dead, don't accept the connection. */
1700   if (!sk->dead) {
1701         sk->data_ready(sk,0);
1702   } else {
1703         DPRINTF((DBG_TCP, "tcp_conn_request on dead socket\n"));
1704         tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1705         tcp_statistics.TcpAttemptFails++;
1706         kfree_skb(skb, FREE_READ);
1707         return;
1708   }
1709 
1710   /*
1711    * Make sure we can accept more.  This will prevent a
1712    * flurry of syns from eating up all our memory.
1713    */
1714   if (sk->ack_backlog >= sk->max_ack_backlog) {
1715         tcp_statistics.TcpAttemptFails++;
1716         kfree_skb(skb, FREE_READ);
1717         return;
1718   }
1719 
1720   /*
1721    * We need to build a new sock struct.
1722    * It is sort of bad to have a socket without an inode attached
1723    * to it, but the wake_up's will just wake up the listening socket,
1724    * and if the listening socket is destroyed before this is taken
1725    * off of the queue, this will take care of it.
1726    */
1727   newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
1728   if (newsk == NULL) {
1729         /* just ignore the syn.  It will get retransmitted. */
1730         tcp_statistics.TcpAttemptFails++;
1731         kfree_skb(skb, FREE_READ);
1732         return;
1733   }
1734 
1735   DPRINTF((DBG_TCP, "newsk = %X\n", newsk));
1736   memcpy(newsk, sk, sizeof(*newsk));
1737   skb_queue_head_init(&newsk->write_queue);
1738   skb_queue_head_init(&newsk->receive_queue);
1739   newsk->send_head = NULL;
1740   newsk->send_tail = NULL;
1741   skb_queue_head_init(&newsk->back_log);
1742   newsk->rtt = TCP_CONNECT_TIME << 3;
1743   newsk->rto = TCP_CONNECT_TIME;
1744   newsk->mdev = 0;
1745   newsk->max_window = 0;
1746   newsk->cong_window = 1;
1747   newsk->cong_count = 0;
1748   newsk->ssthresh = 0;
1749   newsk->backoff = 0;
1750   newsk->blog = 0;
1751   newsk->intr = 0;
1752   newsk->proc = 0;
1753   newsk->done = 0;
1754   newsk->partial = NULL;
1755   newsk->pair = NULL;
1756   newsk->wmem_alloc = 0;
1757   newsk->rmem_alloc = 0;
1758 
1759   newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
1760 
1761   newsk->err = 0;
1762   newsk->shutdown = 0;
1763   newsk->ack_backlog = 0;
1764   newsk->acked_seq = skb->h.th->seq+1;
1765   newsk->fin_seq = skb->h.th->seq;
1766   newsk->copied_seq = skb->h.th->seq;
1767   newsk->state = TCP_SYN_RECV;
1768   newsk->timeout = 0;
1769   newsk->write_seq = jiffies * SEQ_TICK - seq_offset;
1770   newsk->window_seq = newsk->write_seq;
1771   newsk->rcv_ack_seq = newsk->write_seq;
1772   newsk->urg_data = 0;
1773   newsk->retransmits = 0;
1774   newsk->destroy = 0;
1775   newsk->timer.data = (unsigned long)newsk;
1776   newsk->timer.function = &net_timer;
1777   newsk->dummy_th.source = skb->h.th->dest;
1778   newsk->dummy_th.dest = skb->h.th->source;
1779 
1780   /* Swap these two, they are from our point of view. */
1781   newsk->daddr = saddr;
1782   newsk->saddr = daddr;
1783 
1784   put_sock(newsk->num,newsk);
1785   newsk->dummy_th.res1 = 0;
1786   newsk->dummy_th.doff = 6;
1787   newsk->dummy_th.fin = 0;
1788   newsk->dummy_th.syn = 0;
1789   newsk->dummy_th.rst = 0;
1790   newsk->dummy_th.psh = 0;
1791   newsk->dummy_th.ack = 0;
1792   newsk->dummy_th.urg = 0;
1793   newsk->dummy_th.res2 = 0;
1794   newsk->acked_seq = skb->h.th->seq + 1;
1795   newsk->copied_seq = skb->h.th->seq;
1796 
1797   /* Grab the ttl and tos values and use them */
1798   newsk->ip_ttl=sk->ip_ttl;
1799   newsk->ip_tos=skb->ip_hdr->tos;
1800 
1801 /* use 512 or whatever user asked for */
1802 /* note use of sk->user_mss, since user has no direct access to newsk */
1803   if (sk->user_mss)
1804     newsk->mtu = sk->user_mss;
1805   else {
1806 #ifdef SUBNETSARELOCAL
1807     if ((saddr ^ daddr) & default_mask(saddr))
1808 #else
1809     if ((saddr ^ daddr) & dev->pa_mask)
1810 #endif
1811       newsk->mtu = 576 - HEADER_SIZE;
1812     else
1813       newsk->mtu = MAX_WINDOW;
1814   }
1815 /* but not bigger than device MTU */
1816   newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
1817 
1818 /* this will min with what arrived in the packet */
1819   tcp_options(newsk,skb->h.th);
1820 
1821   buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
1822   if (buff == NULL) {
1823         sk->err = -ENOMEM;
1824         newsk->dead = 1;
1825         release_sock(newsk);
1826         kfree_skb(skb, FREE_READ);
1827         tcp_statistics.TcpAttemptFails++;
1828         return;
1829   }
1830   
1831   buff->len = sizeof(struct tcphdr)+4;
1832   buff->sk = newsk;
1833   
1834   t1 =(struct tcphdr *) buff->data;
1835 
1836   /* Put in the IP header and routing stuff. */
1837   tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
1838                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
1839 
1840   /* Something went wrong. */
1841   if (tmp < 0) {
1842         sk->err = tmp;
1843         buff->free=1;
1844         kfree_skb(buff,FREE_WRITE);
1845         newsk->dead = 1;
1846         release_sock(newsk);
1847         skb->sk = sk;
1848         kfree_skb(skb, FREE_READ);
1849         tcp_statistics.TcpAttemptFails++;
1850         return;
1851   }
1852 
1853   buff->len += tmp;
1854   t1 =(struct tcphdr *)((char *)t1 +tmp);
1855   
1856   memcpy(t1, skb->h.th, sizeof(*t1));
1857   buff->h.seq = newsk->write_seq;
1858 
1859   /* Swap the send and the receive. */
1860   t1->dest = skb->h.th->source;
1861   t1->source = newsk->dummy_th.source;
1862   t1->seq = ntohl(newsk->write_seq++);
1863   t1->ack = 1;
1864   newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/
1865   newsk->sent_seq = newsk->write_seq;
1866   t1->window = ntohs(newsk->window);
1867   t1->res1 = 0;
1868   t1->res2 = 0;
1869   t1->rst = 0;
1870   t1->urg = 0;
1871   t1->psh = 0;
1872   t1->syn = 1;
1873   t1->ack_seq = ntohl(skb->h.th->seq+1);
1874   t1->doff = sizeof(*t1)/4+1;
1875 
1876   ptr =(unsigned char *)(t1+1);
1877   ptr[0] = 2;
1878   ptr[1] = 4;
1879   ptr[2] = ((newsk->mtu) >> 8) & 0xff;
1880   ptr[3] =(newsk->mtu) & 0xff;
1881 
1882   tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
1883   newsk->prot->queue_xmit(newsk, dev, buff, 0);
1884 
1885   reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_CONNECT_TIME);
1886   skb->sk = newsk;
1887 
1888   /* Charge the sock_buff to newsk. */
1889   sk->rmem_alloc -= skb->mem_len;
1890   newsk->rmem_alloc += skb->mem_len;
1891 
1892   skb_queue_tail(&sk->receive_queue,skb);
1893   sk->ack_backlog++;
1894   release_sock(newsk);
1895   tcp_statistics.TcpOutSegs++;
1896 }
1897 
1898 
1899 static void
1900 tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
1901 {
1902   struct sk_buff *buff;
1903   int need_reset = 0;
1904   struct tcphdr *t1, *th;
1905   struct proto *prot;
1906   struct device *dev=NULL;
1907   int tmp;
1908 
1909   /*
1910    * We need to grab some memory, and put together a FIN,
1911    * and then put it into the queue to be sent.
1912    */
1913   DPRINTF((DBG_TCP, "tcp_close((struct sock *)%X, %d)\n",sk, timeout));
1914   sk->inuse = 1;
1915   sk->keepopen = 1;
1916   sk->shutdown = SHUTDOWN_MASK;
1917 
1918   if (!sk->dead) 
1919         sk->state_change(sk);
1920 
1921   /* We need to flush the recv. buffs. */
1922   if (skb_peek(&sk->receive_queue) != NULL) 
1923   {
1924         struct sk_buff *skb;
1925         if(sk->debug)
1926                 printk("Clean rcv queue\n");
1927         while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
1928         {
1929                 if(skb->len > 0 && after(skb->h.th->seq + skb->len + 1 , sk->copied_seq))
1930                                 need_reset = 1;
1931                 kfree_skb(skb, FREE_READ);
1932         }
1933         if(sk->debug)
1934                 printk("Cleaned.\n");
1935   }
1936 
1937   /* Get rid off any half-completed packets. */
1938   if (sk->partial) {
1939         tcp_send_partial(sk);
1940   }
1941 
1942   switch(sk->state) {
1943         case TCP_FIN_WAIT1:
1944         case TCP_FIN_WAIT2:
1945         case TCP_LAST_ACK:
1946                 /* start a timer. */
1947                 /* original code was 4 * sk->rtt.  In converting to the
1948                  * new rtt representation, we can't quite use that.
1949                  * it seems to make most sense to  use the backed off value
1950                  */
1951                 reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
1952                 if (timeout) tcp_time_wait(sk);
1953                 release_sock(sk);
1954                 return; /* break causes a double release - messy */
1955         case TCP_TIME_WAIT:
1956                 if (timeout) {
1957                   sk->state = TCP_CLOSE;
1958                 }
1959                 release_sock(sk);
1960                 return;
1961         case TCP_LISTEN:
1962                 sk->state = TCP_CLOSE;
1963                 release_sock(sk);
1964                 return;
1965         case TCP_CLOSE:
1966                 release_sock(sk);
1967                 return;
1968         case TCP_CLOSE_WAIT:
1969         case TCP_ESTABLISHED:
1970         case TCP_SYN_SENT:
1971         case TCP_SYN_RECV:
1972                 prot =(struct proto *)sk->prot;
1973                 th =(struct tcphdr *)&sk->dummy_th;
1974                 buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
1975                 if (buff == NULL) {
1976                         /* This will force it to try again later. */
1977                         /* Or it would have if someone released the socket
1978                            first. Anyway it might work now */
1979                         release_sock(sk);
1980                         if (sk->state != TCP_CLOSE_WAIT)
1981                                         sk->state = TCP_ESTABLISHED;
1982                         reset_timer(sk, TIME_CLOSE, 100);
1983                         return;
1984                 }
1985                 buff->sk = sk;
1986                 buff->free = 1;
1987                 buff->len = sizeof(*t1);
1988                 t1 =(struct tcphdr *) buff->data;
1989 
1990                 /* Put in the IP header and routing stuff. */
1991                 tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1992                                          IPPROTO_TCP, sk->opt,
1993                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1994                 if (tmp < 0) {
1995                         kfree_skb(buff,FREE_WRITE);
1996                         if(sk->state==TCP_ESTABLISHED)
1997                                 sk->state=TCP_FIN_WAIT1;
1998                         else
1999                                 sk->state=TCP_FIN_WAIT2;
2000                         reset_timer(sk, TIME_CLOSE,4*sk->rto);
2001                         if(timeout)
2002                                 tcp_time_wait(sk);
2003 
2004                         DPRINTF((DBG_TCP, "Unable to build header for fin.\n"));
2005                         release_sock(sk);
2006                         return;
2007                 }
2008 
2009                 t1 =(struct tcphdr *)((char *)t1 +tmp);
2010                 buff->len += tmp;
2011                 buff->dev = dev;
2012                 memcpy(t1, th, sizeof(*t1));
2013                 t1->seq = ntohl(sk->write_seq);
2014                 sk->write_seq++;
2015                 buff->h.seq = sk->write_seq;
2016                 t1->ack = 1;
2017 
2018                 /* Ack everything immediately from now on. */
2019                 sk->delay_acks = 0;
2020                 t1->ack_seq = ntohl(sk->acked_seq);
2021                 t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
2022                 t1->fin = 1;
2023                 t1->rst = need_reset;
2024                 t1->doff = sizeof(*t1)/4;
2025                 tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2026 
2027                 tcp_statistics.TcpOutSegs++;
2028                 
2029                 if (skb_peek(&sk->write_queue) == NULL) {
2030                         sk->sent_seq = sk->write_seq;
2031                         prot->queue_xmit(sk, dev, buff, 0);
2032                 } else {
2033                         reset_timer(sk, TIME_WRITE, sk->rto);
2034                         if (buff->next != NULL) {
2035                                 printk("tcp_close: next != NULL\n");
2036                                 skb_unlink(buff);
2037                         }
2038                         skb_queue_tail(&sk->write_queue, buff);
2039                 }
2040 
2041                 if (sk->state == TCP_CLOSE_WAIT) {
2042                         sk->state = TCP_FIN_WAIT2;
2043                 } else {
2044                         sk->state = TCP_FIN_WAIT1;
2045         }
2046   }
2047   release_sock(sk);
2048 }
2049 
2050 
2051 /*
2052  * This routine takes stuff off of the write queue,
2053  * and puts it in the xmit queue.
2054  */
2055 static void
2056 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2057 {
2058   struct sk_buff *skb;
2059 
2060   DPRINTF((DBG_TCP, "tcp_write_xmit(sk=%X)\n", sk));
2061 
2062   /* The bytes will have to remain here. In time closedown will
2063      empty the write queue and all will be happy */
2064   if(sk->zapped)
2065         return;
2066 
2067   while((skb = skb_peek(&sk->write_queue)) != NULL &&
2068         before(skb->h.seq, sk->window_seq + 1) &&
2069         (sk->retransmits == 0 ||
2070          sk->timeout != TIME_WRITE ||
2071          before(skb->h.seq, sk->rcv_ack_seq + 1))
2072         && sk->packets_out < sk->cong_window) {
2073                 IS_SKB(skb);
2074                 skb_unlink(skb);
2075                 DPRINTF((DBG_TCP, "Sending a packet.\n"));
2076 
2077                 /* See if we really need to send the packet. */
2078                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) {
2079                         sk->retransmits = 0;
2080                         kfree_skb(skb, FREE_WRITE);
2081                         if (!sk->dead) sk->write_space(sk);
2082                 } else {
2083                         sk->sent_seq = skb->h.seq;
2084                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2085                 }
2086         }
2087 }
2088 
2089 
2090 /*
2091  * This routine sorts the send list, and resets the
2092  * sk->send_head and sk->send_tail pointers.
2093  */
2094 void
2095 sort_send(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2096 {
2097   struct sk_buff *list = NULL;
2098   struct sk_buff *skb,*skb2,*skb3;
2099 
2100   for (skb = sk->send_head; skb != NULL; skb = skb2) {
2101         skb2 = skb->link3;
2102         if (list == NULL || before (skb2->h.seq, list->h.seq)) {
2103                 skb->link3 = list;
2104                 sk->send_tail = skb;
2105                 list = skb;
2106         } else {
2107                 for (skb3 = list; ; skb3 = skb3->link3) {
2108                         if (skb3->link3 == NULL ||
2109                             before(skb->h.seq, skb3->link3->h.seq)) {
2110                                 skb->link3 = skb3->link3;
2111                                 skb3->link3 = skb;
2112                                 if (skb->link3 == NULL) sk->send_tail = skb;
2113                                 break;
2114                         }
2115                 }
2116         }
2117   }
2118   sk->send_head = list;
2119 }
2120   
2121 
2122 /* This routine deals with incoming acks, but not outgoing ones. */
2123 static int
2124 tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2125 {
2126   unsigned long ack;
2127   int flag = 0;
2128   /* 
2129    * 1 - there was data in packet as well as ack or new data is sent or 
2130    *     in shutdown state
2131    * 2 - data from retransmit queue was acked and removed
2132    * 4 - window shrunk or data from retransmit queue was acked and removed
2133    */
2134 
2135   if(sk->zapped)
2136         return(1);      /* Dead, cant ack any more so why bother */
2137 
2138   ack = ntohl(th->ack_seq);
2139   DPRINTF((DBG_TCP, "tcp_ack ack=%d, window=%d, "
2140           "sk->rcv_ack_seq=%d, sk->window_seq = %d\n",
2141           ack, ntohs(th->window), sk->rcv_ack_seq, sk->window_seq));
2142 
2143   if (ntohs(th->window) > sk->max_window) {
2144         sk->max_window = ntohs(th->window);
2145         sk->mss = min(sk->max_window, sk->mtu);
2146   }
2147 
2148   if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2149         sk->retransmits = 0;
2150 
2151 /* not quite clear why the +1 and -1 here, and why not +1 in next line */
2152   if (after(ack, sk->sent_seq+1) || before(ack, sk->rcv_ack_seq-1)) {
2153         if (after(ack, sk->sent_seq) ||
2154            (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) {
2155                 return(0);
2156         }
2157         if (sk->keepopen) {
2158                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2159         }
2160         return(1);
2161   }
2162 
2163   if (len != th->doff*4) flag |= 1;
2164 
2165   /* See if our window has been shrunk. */
2166   if (after(sk->window_seq, ack+ntohs(th->window))) {
2167         /*
2168          * We may need to move packets from the send queue
2169          * to the write queue, if the window has been shrunk on us.
2170          * The RFC says you are not allowed to shrink your window
2171          * like this, but if the other end does, you must be able
2172          * to deal with it.
2173          */
2174         struct sk_buff *skb;
2175         struct sk_buff *skb2;
2176         struct sk_buff *wskb = NULL;
2177   
2178         skb2 = sk->send_head;
2179         sk->send_head = NULL;
2180         sk->send_tail = NULL;
2181 
2182         flag |= 4;
2183 
2184         sk->window_seq = ack + ntohs(th->window);
2185         cli();
2186         while (skb2 != NULL) {
2187                 skb = skb2;
2188                 skb2 = skb->link3;
2189                 skb->link3 = NULL;
2190                 if (after(skb->h.seq, sk->window_seq)) {
2191                         if (sk->packets_out > 0) sk->packets_out--;
2192                         /* We may need to remove this from the dev send list. */
2193                         if (skb->next != NULL) {
2194                                 skb_unlink(skb);                                
2195                         }
2196                         /* Now add it to the write_queue. */
2197                         if (wskb == NULL)
2198                                 skb_queue_head(&sk->write_queue,skb);
2199                         else
2200                                 skb_append(wskb,skb);
2201                         wskb = skb;
2202                 } else {
2203                         if (sk->send_head == NULL) {
2204                                 sk->send_head = skb;
2205                                 sk->send_tail = skb;
2206                         } else {
2207                                 sk->send_tail->link3 = skb;
2208                                 sk->send_tail = skb;
2209                         }
2210                         skb->link3 = NULL;
2211                 }
2212         }
2213         sti();
2214   }
2215 
2216   if (sk->send_tail == NULL || sk->send_head == NULL) {
2217         sk->send_head = NULL;
2218         sk->send_tail = NULL;
2219         sk->packets_out= 0;
2220   }
2221 
2222   sk->window_seq = ack + ntohs(th->window);
2223 
2224   /* We don't want too many packets out there. */
2225   if (sk->timeout == TIME_WRITE && 
2226       sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) {
2227 /* 
2228  * This is Jacobson's slow start and congestion avoidance. 
2229  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2230  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2231  * counter and increment it once every cwnd times.  It's possible
2232  * that this should be done only if sk->retransmits == 0.  I'm
2233  * interpreting "new data is acked" as including data that has
2234  * been retransmitted but is just now being acked.
2235  */
2236         if (sk->cong_window < sk->ssthresh)  
2237           /* in "safe" area, increase */
2238           sk->cong_window++;
2239         else {
2240           /* in dangerous area, increase slowly.  In theory this is
2241              sk->cong_window += 1 / sk->cong_window
2242            */
2243           if (sk->cong_count >= sk->cong_window) {
2244             sk->cong_window++;
2245             sk->cong_count = 0;
2246           } else 
2247             sk->cong_count++;
2248         }
2249   }
2250 
2251   DPRINTF((DBG_TCP, "tcp_ack: Updating rcv ack sequence.\n"));
2252   sk->rcv_ack_seq = ack;
2253 
2254   /*
2255    * if this ack opens up a zero window, clear backoff.  It was
2256    * being used to time the probes, and is probably far higher than
2257    * it needs to be for normal retransmission
2258    */
2259   if (sk->timeout == TIME_PROBE0) {
2260         if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
2261             ! before (sk->window_seq, sk->write_queue.next->h.seq)) {
2262           sk->retransmits = 0;
2263           sk->backoff = 0;
2264           /* recompute rto from rtt.  this eliminates any backoff */
2265           sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2266           if (sk->rto > 120*HZ)
2267             sk->rto = 120*HZ;
2268           if (sk->rto < 1*HZ)
2269             sk->rto = 1*HZ;
2270         }
2271   }
2272 
2273   /* See if we can take anything off of the retransmit queue. */
2274   while(sk->send_head != NULL) {
2275         /* Check for a bug. */
2276         if (sk->send_head->link3 &&
2277             after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) {
2278                 printk("INET: tcp.c: *** bug send_list out of order.\n");
2279                 sort_send(sk);
2280         }
2281 
2282         if (before(sk->send_head->h.seq, ack+1)) {
2283                 struct sk_buff *oskb;
2284 
2285                 if (sk->retransmits) {
2286 
2287                   /* we were retransmitting.  don't count this in RTT est */
2288                   flag |= 2;
2289 
2290                   /*
2291                    * even though we've gotten an ack, we're still
2292                    * retransmitting as long as we're sending from
2293                    * the retransmit queue.  Keeping retransmits non-zero
2294                    * prevents us from getting new data interspersed with
2295                    * retransmissions.
2296                    */
2297 
2298                   if (sk->send_head->link3)
2299                     sk->retransmits = 1;
2300                   else
2301                     sk->retransmits = 0;
2302 
2303                 }
2304 
2305                 /*
2306                  * Note that we only reset backoff and rto in the
2307                  * rtt recomputation code.  And that doesn't happen
2308                  * if there were retransmissions in effect.  So the
2309                  * first new packet after the retransmissions is
2310                  * sent with the backoff still in effect.  Not until
2311                  * we get an ack from a non-retransmitted packet do
2312                  * we reset the backoff and rto.  This allows us to deal
2313                  * with a situation where the network delay has increased
2314                  * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2315                  */
2316 
2317                 /* We have one less packet out there. */
2318                 if (sk->packets_out > 0) sk->packets_out --;
2319                 DPRINTF((DBG_TCP, "skb=%X skb->h.seq = %d acked ack=%d\n",
2320                                 sk->send_head, sk->send_head->h.seq, ack));
2321 
2322                 /* Wake up the process, it can probably write more. */
2323                 if (!sk->dead) sk->write_space(sk);
2324 
2325                 oskb = sk->send_head;
2326 
2327                 if (!(flag&2)) {
2328                   long m;
2329 
2330                   /* The following amusing code comes from Jacobson's
2331                    * article in SIGCOMM '88.  Note that rtt and mdev
2332                    * are scaled versions of rtt and mean deviation.
2333                    * This is designed to be as fast as possible 
2334                    * m stands for "measurement".
2335                    */
2336 
2337                   m = jiffies - oskb->when;  /* RTT */
2338                   m -= (sk->rtt >> 3);       /* m is now error in rtt est */
2339                   sk->rtt += m;              /* rtt = 7/8 rtt + 1/8 new */
2340                   if (m < 0)
2341                     m = -m;                  /* m is now abs(error) */
2342                   m -= (sk->mdev >> 2);      /* similar update on mdev */
2343                   sk->mdev += m;             /* mdev = 3/4 mdev + 1/4 new */
2344 
2345                   /* now update timeout.  Note that this removes any backoff */
2346                   sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2347                   if (sk->rto > 120*HZ)
2348                     sk->rto = 120*HZ;
2349                   if (sk->rto < 1*HZ)
2350                     sk->rto = 1*HZ;
2351                   sk->backoff = 0;
2352 
2353                 }
2354                 flag |= (2|4);
2355 
2356                 cli();
2357 
2358                 oskb = sk->send_head;
2359                 IS_SKB(oskb);
2360                 sk->send_head = oskb->link3;
2361                 if (sk->send_head == NULL) {
2362                         sk->send_tail = NULL;
2363                 }
2364 
2365                 /* We may need to remove this from the dev send list. */
2366                 if (oskb->next)
2367                         skb_unlink(oskb);
2368                 sti();
2369                 kfree_skb(oskb, FREE_WRITE); /* write. */
2370                 if (!sk->dead) sk->write_space(sk);
2371         } else {
2372                 break;
2373         }
2374   }
2375 
2376   /*
2377    * Maybe we can take some stuff off of the write queue,
2378    * and put it onto the xmit queue.
2379    */
2380   if (skb_peek(&sk->write_queue) != NULL) {
2381         if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
2382                 (sk->retransmits == 0 || 
2383                  sk->timeout != TIME_WRITE ||
2384                  before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
2385                 && sk->packets_out < sk->cong_window) {
2386                 flag |= 1;
2387                 tcp_write_xmit(sk);
2388         } else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
2389                    sk->send_head == NULL &&
2390                    sk->ack_backlog == 0 &&
2391                    sk->state != TCP_TIME_WAIT) {
2392                 reset_timer(sk, TIME_PROBE0, sk->rto);
2393         }               
2394   } else {
2395         if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2396             sk->state != TCP_TIME_WAIT && !sk->keepopen) {
2397                 DPRINTF((DBG_TCP, "Nothing to do, going to sleep.\n")); 
2398                 if (!sk->dead) sk->write_space(sk);
2399 
2400                 if (sk->keepopen)
2401                         reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2402                 else
2403                         delete_timer(sk);
2404         } else {
2405                 if (sk->state != (unsigned char) sk->keepopen) {
2406                         reset_timer(sk, TIME_WRITE, sk->rto);
2407                 }
2408                 if (sk->state == TCP_TIME_WAIT) {
2409                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2410                 }
2411         }
2412   }
2413 
2414   if (sk->packets_out == 0 && sk->partial != NULL &&
2415       skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) {
2416         flag |= 1;
2417         tcp_send_partial(sk);
2418   }
2419 
2420   /* See if we are done. */
2421   if (sk->state == TCP_TIME_WAIT) {
2422         if (!sk->dead)
2423                 sk->state_change(sk);
2424         if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) {
2425                 flag |= 1;
2426                 sk->state = TCP_CLOSE;
2427                 sk->shutdown = SHUTDOWN_MASK;
2428         }
2429   }
2430 
2431   if (sk->state == TCP_LAST_ACK || sk->state == TCP_FIN_WAIT2) {
2432         if (!sk->dead) sk->state_change(sk);
2433         if (sk->rcv_ack_seq == sk->write_seq) {
2434                 flag |= 1;
2435                 if (sk->acked_seq != sk->fin_seq) {
2436                         tcp_time_wait(sk);
2437                 } else {
2438                         DPRINTF((DBG_TCP, "tcp_ack closing socket - %X\n", sk));
2439                         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk,
2440                                      th, sk->daddr);
2441                         sk->shutdown = SHUTDOWN_MASK;
2442                         sk->state = TCP_CLOSE;
2443                 }
2444         }
2445   }
2446 
2447 /*
2448  * I make no guarantees about the first clause in the following
2449  * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
2450  * what conditions "!flag" would be true.  However I think the rest
2451  * of the conditions would prevent that from causing any
2452  * unnecessary retransmission. 
2453  *   Clearly if the first packet has expired it should be 
2454  * retransmitted.  The other alternative, "flag&2 && retransmits", is
2455  * harder to explain:  You have to look carefully at how and when the
2456  * timer is set and with what timeout.  The most recent transmission always
2457  * sets the timer.  So in general if the most recent thing has timed
2458  * out, everything before it has as well.  So we want to go ahead and
2459  * retransmit some more.  If we didn't explicitly test for this
2460  * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
2461  * would not be true.  If you look at the pattern of timing, you can
2462  * show that rto is increased fast enough that the next packet would
2463  * almost never be retransmitted immediately.  Then you'd end up
2464  * waiting for a timeout to send each packet on the retranmission
2465  * queue.  With my implementation of the Karn sampling algorithm,
2466  * the timeout would double each time.  The net result is that it would
2467  * take a hideous amount of time to recover from a single dropped packet.
2468  * It's possible that there should also be a test for TIME_WRITE, but
2469  * I think as long as "send_head != NULL" and "retransmit" is on, we've
2470  * got to be in real retransmission mode.
2471  *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
2472  * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
2473  * As long as no further losses occur, this seems reasonable.
2474  */
2475 
2476   if (((!flag) || (flag&4)) && sk->send_head != NULL &&
2477       (((flag&2) && sk->retransmits) ||
2478        (sk->send_head->when + sk->rto < jiffies))) {
2479         ip_do_retransmit(sk, 1);
2480         reset_timer(sk, TIME_WRITE, sk->rto);
2481       }
2482 
2483   DPRINTF((DBG_TCP, "leaving tcp_ack\n"));
2484   return(1);
2485 }
2486 
2487 
2488 /*
2489  * This routine handles the data.  If there is room in the buffer,
2490  * it will be have already been moved into it.  If there is no
2491  * room, then we will just have to discard the packet.
2492  */
2493 static int
2494 tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
2495          unsigned long saddr, unsigned short len)
2496 {
2497   struct sk_buff *skb1, *skb2;
2498   struct tcphdr *th;
2499   int dup_dumped=0;
2500 
2501   th = skb->h.th;
2502   print_th(th);
2503   skb->len = len -(th->doff*4);
2504 
2505   DPRINTF((DBG_TCP, "tcp_data len = %d sk = %X:\n", skb->len, sk));
2506 
2507   sk->bytes_rcv += skb->len;
2508   if (skb->len == 0 && !th->fin && !th->urg && !th->psh) {
2509         /* Don't want to keep passing ack's back and forth. */
2510         if (!th->ack) tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
2511         kfree_skb(skb, FREE_READ);
2512         return(0);
2513   }
2514 
2515   if (sk->shutdown & RCV_SHUTDOWN) {
2516         sk->acked_seq = th->seq + skb->len + th->syn + th->fin;
2517         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
2518                 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
2519         tcp_statistics.TcpEstabResets++;
2520         sk->state = TCP_CLOSE;
2521         sk->err = EPIPE;
2522         sk->shutdown = SHUTDOWN_MASK;
2523         DPRINTF((DBG_TCP, "tcp_data: closing socket - %X\n", sk));
2524         kfree_skb(skb, FREE_READ);
2525         if (!sk->dead) sk->state_change(sk);
2526         return(0);
2527   }
2528 
2529   /*
2530    * Now we have to walk the chain, and figure out where this one
2531    * goes into it.  This is set up so that the last packet we received
2532    * will be the first one we look at, that way if everything comes
2533    * in order, there will be no performance loss, and if they come
2534    * out of order we will be able to fit things in nicely.
2535    */
2536 
2537   /* This should start at the last one, and then go around forwards. */
2538   if (skb_peek(&sk->receive_queue) == NULL) {
2539         DPRINTF((DBG_TCP, "tcp_data: skb = %X:\n", skb));
2540         skb_queue_head(&sk->receive_queue,skb);
2541         skb1= NULL;
2542   } else {
2543         DPRINTF((DBG_TCP, "tcp_data adding to chain sk = %X:\n", sk));
2544         for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) {
2545                 if(sk->debug)
2546                 {
2547                         printk("skb1=%p :", skb1);
2548                         printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
2549                         printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
2550                         printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
2551                                         sk->acked_seq);
2552                 }
2553                 if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
2554                 {
2555                         skb_append(skb1,skb);
2556                         skb_unlink(skb1);
2557                         kfree_skb(skb1,FREE_READ);
2558                         dup_dumped=1;
2559                         skb1=NULL;
2560                         break;
2561                 }
2562                 if (after(th->seq+1, skb1->h.th->seq))
2563                 {
2564                         skb_append(skb1,skb);
2565                         break;
2566                 }
2567                 if (skb1 == skb_peek(&sk->receive_queue))
2568                 {
2569                         skb_queue_head(&sk->receive_queue, skb);
2570                         break;
2571                 }
2572         }
2573         DPRINTF((DBG_TCP, "skb = %X:\n", skb));
2574   }
2575 
2576   th->ack_seq = th->seq + skb->len;
2577   if (th->syn) th->ack_seq++;
2578   if (th->fin) th->ack_seq++;
2579 
2580   if (before(sk->acked_seq, sk->copied_seq)) {
2581         printk("*** tcp.c:tcp_data bug acked < copied\n");
2582         sk->acked_seq = sk->copied_seq;
2583   }
2584 
2585   /* Now figure out if we can ack anything. */
2586   if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) {
2587       if (before(th->seq, sk->acked_seq+1)) {
2588                 int newwindow;
2589 
2590                 if (after(th->ack_seq, sk->acked_seq)) {
2591                         newwindow = sk->window -
2592                                        (th->ack_seq - sk->acked_seq);
2593                         if (newwindow < 0)
2594                                 newwindow = 0;  
2595                         sk->window = newwindow;
2596                         sk->acked_seq = th->ack_seq;
2597                 }
2598                 skb->acked = 1;
2599 
2600                 /* When we ack the fin, we turn on the RCV_SHUTDOWN flag. */
2601                 if (skb->h.th->fin) {
2602                         if (!sk->dead) sk->state_change(sk);
2603                         sk->shutdown |= RCV_SHUTDOWN;
2604                 }
2605           
2606                 for(skb2 = skb->next;
2607                     skb2 != (struct sk_buff *)&sk->receive_queue;
2608                     skb2 = skb2->next) {
2609                         if (before(skb2->h.th->seq, sk->acked_seq+1)) {
2610                                 if (after(skb2->h.th->ack_seq, sk->acked_seq))
2611                                 {
2612                                         newwindow = sk->window -
2613                                          (skb2->h.th->ack_seq - sk->acked_seq);
2614                                         if (newwindow < 0)
2615                                                 newwindow = 0;  
2616                                         sk->window = newwindow;
2617                                         sk->acked_seq = skb2->h.th->ack_seq;
2618                                 }
2619                                 skb2->acked = 1;
2620 
2621                                 /*
2622                                  * When we ack the fin, we turn on
2623                                  * the RCV_SHUTDOWN flag.
2624                                  */
2625                                 if (skb2->h.th->fin) {
2626                                         sk->shutdown |= RCV_SHUTDOWN;
2627                                         if (!sk->dead) sk->state_change(sk);
2628                                 }
2629 
2630                                 /* Force an immediate ack. */
2631                                 sk->ack_backlog = sk->max_ack_backlog;
2632                         } else {
2633                                 break;
2634                         }
2635                 }
2636 
2637                 /*
2638                  * This also takes care of updating the window.
2639                  * This if statement needs to be simplified.
2640                  */
2641                 if (!sk->delay_acks ||
2642                     sk->ack_backlog >= sk->max_ack_backlog || 
2643                     sk->bytes_rcv > sk->max_unacked || th->fin) {
2644 /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
2645                 } else {
2646                         sk->ack_backlog++;
2647                         if(sk->debug)
2648                                 printk("Ack queued.\n");
2649                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
2650                 }
2651         }
2652   }
2653 
2654   /*
2655    * If we've missed a packet, send an ack.
2656    * Also start a timer to send another.
2657    */
2658   if (!skb->acked) {
2659         /*
2660          * This is important.  If we don't have much room left,
2661          * we need to throw out a few packets so we have a good
2662          * window.  Note that mtu is used, not mss, because mss is really
2663          * for the send side.  He could be sending us stuff as large as mtu.
2664          */
2665         while (sk->prot->rspace(sk) < sk->mtu) {
2666                 skb1 = skb_peek(&sk->receive_queue);
2667                 if (skb1 == NULL) {
2668                         printk("INET: tcp.c:tcp_data memory leak detected.\n");
2669                         break;
2670                 }
2671 
2672                 /* Don't throw out something that has been acked. */
2673                 if (skb1->acked) {
2674                         break;
2675                 }
2676                 
2677                 skb_unlink(skb1);
2678                 kfree_skb(skb1, FREE_READ);
2679         }
2680         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
2681         sk->ack_backlog++;
2682         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
2683   } else {
2684         /* We missed a packet.  Send an ack to try to resync things. */
2685         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
2686   }
2687 
2688   /* Now tell the user we may have some data. */
2689   if (!sk->dead) {
2690         if(sk->debug)
2691                 printk("Data wakeup.\n");
2692         sk->data_ready(sk,0);
2693   } else {
2694         DPRINTF((DBG_TCP, "data received on dead socket.\n"));
2695   }
2696 
2697   if (sk->state == TCP_FIN_WAIT2 &&
2698       sk->acked_seq == sk->fin_seq && sk->rcv_ack_seq == sk->write_seq) {
2699         DPRINTF((DBG_TCP, "tcp_data: entering last_ack state sk = %X\n", sk));
2700 
2701 /*      tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr); */
2702         sk->shutdown = SHUTDOWN_MASK;
2703         sk->state = TCP_LAST_ACK;
2704         if (!sk->dead) sk->state_change(sk);
2705   }
2706 
2707   return(0);
2708 }
2709 
2710 
2711 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
2712 {
2713         unsigned long ptr = ntohs(th->urg_ptr);
2714 
2715         if (ptr)
2716                 ptr--;
2717         ptr += th->seq;
2718 
2719         /* ignore urgent data that we've already seen and read */
2720         if (after(sk->copied_seq+1, ptr))
2721                 return;
2722 
2723         /* do we already have a newer (or duplicate) urgent pointer? */
2724         if (sk->urg_data && !after(ptr, sk->urg_seq))
2725                 return;
2726 
2727         /* tell the world about our new urgent pointer */
2728         if (sk->proc != 0) {
2729                 if (sk->proc > 0) {
2730                         kill_proc(sk->proc, SIGURG, 1);
2731                 } else {
2732                         kill_pg(-sk->proc, SIGURG, 1);
2733                 }
2734         }
2735         sk->urg_data = URG_NOTYET;
2736         sk->urg_seq = ptr;
2737 }
2738 
2739 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
2740         unsigned long saddr, unsigned long len)
2741 {
2742         unsigned long ptr;
2743 
2744         /* check if we get a new urgent pointer */
2745         if (th->urg)
2746                 tcp_check_urg(sk,th);
2747 
2748         /* do we wait for any urgent data? */
2749         if (sk->urg_data != URG_NOTYET)
2750                 return 0;
2751 
2752         /* is the urgent pointer pointing into this packet? */
2753         ptr = sk->urg_seq - th->seq + th->doff*4;
2754         if (ptr >= len)
2755                 return 0;
2756 
2757         /* ok, got the correct packet, update info */
2758         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
2759         if (!sk->dead)
2760                 sk->data_ready(sk,0);
2761         return 0;
2762 }
2763 
2764 
2765 /* This deals with incoming fins. 'Linus at 9 O'clock' 8-) */
2766 static int
2767 tcp_fin(struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
2768          unsigned long saddr, struct device *dev)
2769 {
2770   DPRINTF((DBG_TCP, "tcp_fin(sk=%X, th=%X, saddr=%X, dev=%X)\n",
2771                                                 sk, th, saddr, dev));
2772   
2773   if (!sk->dead) {
2774         sk->state_change(sk);
2775   }
2776 
2777   switch(sk->state) {
2778         case TCP_SYN_RECV:
2779         case TCP_SYN_SENT:
2780         case TCP_ESTABLISHED:
2781                 /* Contains the one that needs to be acked */
2782                 reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
2783                 sk->fin_seq = th->seq+1;
2784                 tcp_statistics.TcpCurrEstab--;
2785                 sk->state = TCP_CLOSE_WAIT;
2786                 if (th->rst) sk->shutdown = SHUTDOWN_MASK;
2787                 break;
2788 
2789         case TCP_CLOSE_WAIT:
2790         case TCP_FIN_WAIT2:
2791                 break; /* we got a retransmit of the fin. */
2792 
2793         case TCP_FIN_WAIT1:
2794                 /* Contains the one that needs to be acked */
2795                 sk->fin_seq = th->seq+1;
2796                 sk->state = TCP_FIN_WAIT2;
2797                 break;
2798 
2799         default:
2800         case TCP_TIME_WAIT:
2801                 sk->state = TCP_LAST_ACK;
2802 
2803                 /* Start the timers. */
2804                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2805                 return(0);
2806   }
2807   sk->ack_backlog++;
2808 
2809   return(0);
2810 }
2811 
2812 
2813 /* This will accept the next outstanding connection. */
2814 static struct sock *
2815 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
2816 {
2817   struct sock *newsk;
2818   struct sk_buff *skb;
2819   
2820   DPRINTF((DBG_TCP, "tcp_accept(sk=%X, flags=%X, addr=%s)\n",
2821                                 sk, flags, in_ntoa(sk->saddr)));
2822 
2823   /*
2824    * We need to make sure that this socket is listening,
2825    * and that it has something pending.
2826    */
2827   if (sk->state != TCP_LISTEN) {
2828         sk->err = EINVAL;
2829         return(NULL); 
2830   }
2831 
2832   /* avoid the race. */
2833   cli();
2834   sk->inuse = 1;
2835   while((skb = skb_dequeue(&sk->receive_queue)) == NULL) {
2836         if (flags & O_NONBLOCK) {
2837                 sti();
2838                 release_sock(sk);
2839                 sk->err = EAGAIN;
2840                 return(NULL);
2841         }
2842 
2843         release_sock(sk);
2844         interruptible_sleep_on(sk->sleep);
2845         if (current->signal & ~current->blocked) {
2846                 sti();
2847                 sk->err = ERESTARTSYS;
2848                 return(NULL);
2849         }
2850         sk->inuse = 1;
2851   }
2852   sti();
2853 
2854   /* Now all we need to do is return skb->sk. */
2855   newsk = skb->sk;
2856 
2857   kfree_skb(skb, FREE_READ);
2858   sk->ack_backlog--;
2859   release_sock(sk);
2860   return(newsk);
2861 }
2862 
2863 
2864 /* This will initiate an outgoing connection. */
2865 static int
2866 tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
2867 {
2868   struct sk_buff *buff;
2869   struct sockaddr_in sin;
2870   struct device *dev=NULL;
2871   unsigned char *ptr;
2872   int tmp;
2873   struct tcphdr *t1;
2874   int err;
2875 
2876   if (sk->state != TCP_CLOSE) return(-EISCONN);
2877   if (addr_len < 8) return(-EINVAL);
2878 
2879   err=verify_area(VERIFY_READ, usin, addr_len);
2880   if(err)
2881         return err;
2882         
2883   memcpy_fromfs(&sin,usin, min(sizeof(sin), addr_len));
2884 
2885   if (sin.sin_family && sin.sin_family != AF_INET) return(-EAFNOSUPPORT);
2886 
2887   DPRINTF((DBG_TCP, "TCP connect daddr=%s\n", in_ntoa(sin.sin_addr.s_addr)));
2888   
2889   /* Don't want a TCP connection going to a broadcast address */
2890   if (ip_chk_addr(sin.sin_addr.s_addr) == IS_BROADCAST) { 
2891         DPRINTF((DBG_TCP, "TCP connection to broadcast address not allowed\n"));
2892         return(-ENETUNREACH);
2893   }
2894   
2895   /* Connect back to the same socket: Blows up so disallow it */
2896   if(sk->saddr == sin.sin_addr.s_addr && sk->num==ntohs(sin.sin_port))
2897         return -EBUSY;
2898 
2899   sk->inuse = 1;
2900   sk->daddr = sin.sin_addr.s_addr;
2901   sk->write_seq = jiffies * SEQ_TICK - seq_offset;
2902   sk->window_seq = sk->write_seq;
2903   sk->rcv_ack_seq = sk->write_seq -1;
2904   sk->err = 0;
2905   sk->dummy_th.dest = sin.sin_port;
2906   release_sock(sk);
2907 
2908   buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
2909   if (buff == NULL) {
2910         return(-ENOMEM);
2911   }
2912   sk->inuse = 1;
2913   buff->len = 24;
2914   buff->sk = sk;
2915   buff->free = 1;
2916   t1 = (struct tcphdr *) buff->data;
2917 
2918   /* Put in the IP header and routing stuff. */
2919   /* We need to build the routing stuff fromt the things saved in skb. */
2920   tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
2921                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2922   if (tmp < 0) {
2923         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
2924         release_sock(sk);
2925         return(-ENETUNREACH);
2926   }
2927   buff->len += tmp;
2928   t1 = (struct tcphdr *)((char *)t1 +tmp);
2929 
2930   memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
2931   t1->seq = ntohl(sk->write_seq++);
2932   sk->sent_seq = sk->write_seq;
2933   buff->h.seq = sk->write_seq;
2934   t1->ack = 0;
2935   t1->window = 2;
2936   t1->res1=0;
2937   t1->res2=0;
2938   t1->rst = 0;
2939   t1->urg = 0;
2940   t1->psh = 0;
2941   t1->syn = 1;
2942   t1->urg_ptr = 0;
2943   t1->doff = 6;
2944 
2945 /* use 512 or whatever user asked for */
2946   if (sk->user_mss)
2947     sk->mtu = sk->user_mss;
2948   else {
2949 #ifdef SUBNETSARELOCAL
2950     if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
2951 #else
2952     if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
2953 #endif
2954       sk->mtu = 576 - HEADER_SIZE;
2955     else
2956       sk->mtu = MAX_WINDOW;
2957   }
2958 /* but not bigger than device MTU */
2959   sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
2960 
2961   /* Put in the TCP options to say MTU. */
2962   ptr = (unsigned char *)(t1+1);
2963   ptr[0] = 2;
2964   ptr[1] = 4;
2965   ptr[2] = (sk->mtu) >> 8;
2966   ptr[3] = (sk->mtu) & 0xff;
2967   tcp_send_check(t1, sk->saddr, sk->daddr,
2968                   sizeof(struct tcphdr) + 4, sk);
2969 
2970   /* This must go first otherwise a really quick response will get reset. */
2971   sk->state = TCP_SYN_SENT;
2972   sk->rtt = TCP_CONNECT_TIME;
2973   reset_timer(sk, TIME_WRITE, TCP_CONNECT_TIME);        /* Timer for repeating the SYN until an answer */
2974   sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
2975 
2976   sk->prot->queue_xmit(sk, dev, buff, 0);  
2977   tcp_statistics.TcpActiveOpens++;
2978   tcp_statistics.TcpOutSegs++;
2979   
2980   release_sock(sk);
2981   return(0);
2982 }
2983 
2984 
2985 /* This functions checks to see if the tcp header is actually acceptable. */
2986 static int
2987 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
2988              struct options *opt, unsigned long saddr, struct device *dev)
2989 {
2990         unsigned long next_seq;
2991 
2992         next_seq = len - 4*th->doff;
2993         if (th->fin)
2994                 next_seq++;
2995         /* if we have a zero window, we can't have any data in the packet.. */
2996         if (next_seq && !sk->window)
2997                 goto ignore_it;
2998         next_seq += th->seq;
2999 
3000         /*
3001          * This isn't quite right.  sk->acked_seq could be more recent
3002          * than sk->window.  This is however close enough.  We will accept
3003          * slightly more packets than we should, but it should not cause
3004          * problems unless someone is trying to forge packets.
3005          */
3006 
3007         /* have we already seen all of this packet? */
3008         if (!after(next_seq+1, sk->acked_seq))
3009                 goto ignore_it;
3010         /* or does it start beyond the window? */
3011         if (!before(th->seq, sk->acked_seq + sk->window + 1))
3012                 goto ignore_it;
3013 
3014         /* ok, at least part of this packet would seem interesting.. */
3015         return 1;
3016 
3017 ignore_it:
3018         DPRINTF((DBG_TCP, "tcp_sequence: rejecting packet.\n"));
3019 
3020         if (th->rst)
3021                 return 0;
3022 
3023         /*
3024          *      Send a reset if we get something not ours and we are
3025          *      unsynchronized. Note: We don't do anything to our end. We
3026          *      are just killing the bogus remote connection then we will
3027          *      connect again and it will work (with luck).
3028          */
3029          
3030         if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) {
3031                 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3032                 return 1;
3033         }
3034 
3035         /* Try to resync things. */
3036         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3037         return 0;
3038 }
3039 
3040 
3041 int
3042 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3043         unsigned long daddr, unsigned short len,
3044         unsigned long saddr, int redo, struct inet_protocol * protocol)
3045 {
3046   struct tcphdr *th;
3047   struct sock *sk;
3048 
3049   if (!skb) {
3050         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv skb = NULL\n"));
3051         return(0);
3052   }
3053 
3054   if (!dev) 
3055   {
3056         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv dev = NULL\n"));
3057         return(0);
3058   }
3059   
3060   tcp_statistics.TcpInSegs++;
3061   
3062   th = skb->h.th;
3063 
3064   /* Find the socket. */
3065   sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3066   DPRINTF((DBG_TCP, "<<\n"));
3067   DPRINTF((DBG_TCP, "len = %d, redo = %d, skb=%X\n", len, redo, skb));
3068   
3069   /* If this socket has got a reset its to all intents and purposes 
3070      really dead */
3071   if (sk!=NULL && sk->zapped)
3072         sk=NULL;
3073 
3074   if (sk) {
3075          DPRINTF((DBG_TCP, "sk = %X:\n", sk));
3076   }
3077 
3078   if (!redo) {
3079         if (tcp_check(th, len, saddr, daddr )) {
3080                 skb->sk = NULL;
3081                 DPRINTF((DBG_TCP, "packet dropped with bad checksum.\n"));
3082 if (inet_debug == DBG_SLIP) printk("\rtcp_rcv: bad checksum\n");
3083                 kfree_skb(skb,FREE_READ);
3084                 /*
3085                  * We don't release the socket because it was
3086                  * never marked in use.
3087                  */
3088                 return(0);
3089         }
3090 
3091         th->seq = ntohl(th->seq);
3092 
3093         /* See if we know about the socket. */
3094         if (sk == NULL) {
3095                 if (!th->rst)
3096                         tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3097                 skb->sk = NULL;
3098                 kfree_skb(skb, FREE_READ);
3099                 return(0);
3100         }
3101 
3102         skb->len = len;
3103         skb->sk = sk;
3104         skb->acked = 0;
3105         skb->used = 0;
3106         skb->free = 0;
3107         skb->saddr = daddr;
3108         skb->daddr = saddr;
3109 
3110         /* We may need to add it to the backlog here. */
3111         cli();
3112         if (sk->inuse) {
3113                 skb_queue_head(&sk->back_log, skb);
3114                 sti();
3115                 return(0);
3116         }
3117         sk->inuse = 1;
3118         sti();
3119   } else {
3120         if (!sk) {
3121                 DPRINTF((DBG_TCP, "tcp.c: tcp_rcv bug sk=NULL redo = 1\n"));
3122                 return(0);
3123         }
3124   }
3125 
3126   if (!sk->prot) {
3127         DPRINTF((DBG_TCP, "tcp.c: tcp_rcv sk->prot = NULL \n"));
3128         return(0);
3129   }
3130 
3131   /* Charge the memory to the socket. */
3132   if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) {
3133         skb->sk = NULL;
3134         DPRINTF((DBG_TCP, "dropping packet due to lack of buffer space.\n"));
3135         kfree_skb(skb, FREE_READ);
3136         release_sock(sk);
3137         return(0);
3138   }
3139   sk->rmem_alloc += skb->mem_len;
3140 
3141   DPRINTF((DBG_TCP, "About to do switch.\n"));
3142 
3143   /* Now deal with it. */
3144   switch(sk->state) {
3145         /*
3146          * This should close the system down if it's waiting
3147          * for an ack that is never going to be sent.
3148          */
3149         case TCP_LAST_ACK:
3150                 if (th->rst) {
3151                         sk->zapped=1;
3152                         sk->err = ECONNRESET;
3153                         sk->state = TCP_CLOSE;
3154                         sk->shutdown = SHUTDOWN_MASK;
3155                         if (!sk->dead) {
3156                                 sk->state_change(sk);
3157                         }
3158                         kfree_skb(skb, FREE_READ);
3159                         release_sock(sk);
3160                         return(0);
3161                 }
3162 
3163         case TCP_ESTABLISHED:
3164         case TCP_CLOSE_WAIT:
3165         case TCP_FIN_WAIT1:
3166         case TCP_FIN_WAIT2:
3167         case TCP_TIME_WAIT:
3168                 if (!tcp_sequence(sk, th, len, opt, saddr,dev)) {
3169                         if (inet_debug == DBG_SLIP) 
3170                                 printk("\rtcp_rcv: not in seq\n");
3171                         kfree_skb(skb, FREE_READ);
3172                         release_sock(sk);
3173                         return(0);
3174                 }
3175 
3176                 if (th->rst) 
3177                 {
3178                         tcp_statistics.TcpEstabResets++;
3179                         tcp_statistics.TcpCurrEstab--;
3180                         sk->zapped=1;
3181                         /* This means the thing should really be closed. */
3182                         sk->err = ECONNRESET;
3183 
3184                         if (sk->state == TCP_CLOSE_WAIT) 
3185                         {
3186                                 sk->err = EPIPE;
3187                         }
3188 
3189                         /*
3190                          * A reset with a fin just means that
3191                          * the data was not all read.
3192                          */
3193                         sk->state = TCP_CLOSE;
3194                         sk->shutdown = SHUTDOWN_MASK;
3195                         if (!sk->dead) 
3196                         {
3197                                 sk->state_change(sk);
3198                         }
3199                         kfree_skb(skb, FREE_READ);
3200                         release_sock(sk);
3201                         return(0);
3202                 }
3203                 if (th->syn) 
3204                 {
3205                         tcp_statistics.TcpCurrEstab--;
3206                         tcp_statistics.TcpEstabResets++;
3207                         sk->err = ECONNRESET;
3208                         sk->state = TCP_CLOSE;
3209                         sk->shutdown = SHUTDOWN_MASK;
3210                         tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
3211                         if (!sk->dead) {
3212                                 sk->state_change(sk);
3213                         }
3214                         kfree_skb(skb, FREE_READ);
3215                         release_sock(sk);
3216                         return(0);
3217                 }
3218 
3219                 if (th->ack && !tcp_ack(sk, th, saddr, len)) {
3220                         kfree_skb(skb, FREE_READ);
3221                         release_sock(sk);
3222                         return(0);
3223                 }
3224 
3225                 if (tcp_urg(sk, th, saddr, len)) {
3226                         kfree_skb(skb, FREE_READ);
3227                         release_sock(sk);
3228                         return(0);
3229                 }
3230 
3231                 if (tcp_data(skb, sk, saddr, len)) {
3232                         kfree_skb(skb, FREE_READ);
3233                         release_sock(sk);
3234                         return(0);
3235                 }
3236 
3237                 /* Moved: you must do data then fin bit */
3238                 if (th->fin && tcp_fin(sk, th, saddr, dev)) {
3239                         kfree_skb(skb, FREE_READ);
3240                         release_sock(sk);
3241                         return(0);
3242                 }
3243 
3244                 release_sock(sk);
3245                 return(0);
3246 
3247         case TCP_CLOSE:
3248                 if (sk->dead || sk->daddr) {
3249                         DPRINTF((DBG_TCP, "packet received for closed,dead socket\n"));
3250                         kfree_skb(skb, FREE_READ);
3251                         release_sock(sk);
3252                         return(0);
3253                 }
3254 
3255                 if (!th->rst) {
3256                         if (!th->ack)
3257                                 th->ack_seq = 0;
3258                         tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3259                 }
3260                 kfree_skb(skb, FREE_READ);
3261                 release_sock(sk);
3262                 return(0);
3263 
3264         case TCP_LISTEN:
3265                 if (th->rst) {
3266                         kfree_skb(skb, FREE_READ);
3267                         release_sock(sk);
3268                         return(0);
3269                 }
3270                 if (th->ack) {
3271                         tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3272                         kfree_skb(skb, FREE_READ);
3273                         release_sock(sk);
3274                         return(0);
3275                 }
3276 
3277                 if (th->syn) 
3278                 {
3279                         /*
3280                          * Now we just put the whole thing including
3281                          * the header and saddr, and protocol pointer
3282                          * into the buffer.  We can't respond until the
3283                          * user tells us to accept the connection.
3284                          */
3285                         tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
3286                         release_sock(sk);
3287                         return(0);
3288                 }
3289 
3290                 kfree_skb(skb, FREE_READ);
3291                 release_sock(sk);
3292                 return(0);
3293 
3294         case TCP_SYN_RECV:
3295                 if (th->syn) {
3296                         /* Probably a retransmitted syn */
3297                         kfree_skb(skb, FREE_READ);
3298                         release_sock(sk);
3299                         return(0);
3300                 }
3301 
3302 
3303         default:
3304                 if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
3305                 {
3306                         kfree_skb(skb, FREE_READ);
3307                         release_sock(sk);
3308                         return(0);
3309                 }
3310 
3311         case TCP_SYN_SENT:
3312                 if (th->rst) 
3313                 {
3314                         tcp_statistics.TcpAttemptFails++;
3315                         sk->err = ECONNREFUSED;
3316                         sk->state = TCP_CLOSE;
3317                         sk->shutdown = SHUTDOWN_MASK;
3318                         sk->zapped = 1;
3319                         if (!sk->dead) 
3320                         {
3321                                 sk->state_change(sk);
3322                         }
3323                         kfree_skb(skb, FREE_READ);
3324                         release_sock(sk);
3325                         return(0);
3326                 }
3327                 if (!th->ack) 
3328                 {
3329                         if (th->syn) 
3330                         {
3331                                 sk->state = TCP_SYN_RECV;
3332                         }
3333 
3334                         kfree_skb(skb, FREE_READ);
3335                         release_sock(sk);
3336                         return(0);
3337                 }
3338 
3339                 switch(sk->state) 
3340                 {
3341                         case TCP_SYN_SENT:
3342                                 if (!tcp_ack(sk, th, saddr, len)) 
3343                                 {
3344                                         tcp_statistics.TcpAttemptFails++;
3345                                         tcp_reset(daddr, saddr, th,
3346                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3347                                         kfree_skb(skb, FREE_READ);
3348                                         release_sock(sk);
3349                                         return(0);
3350                                 }
3351 
3352                                 /*
3353                                  * If the syn bit is also set, switch to
3354                                  * tcp_syn_recv, and then to established.
3355                                  */
3356                                 if (!th->syn) 
3357                                 {
3358                                         kfree_skb(skb, FREE_READ);
3359                                         release_sock(sk);
3360                                         return(0);
3361                                 }
3362 
3363                                 /* Ack the syn and fall through. */
3364                                 sk->acked_seq = th->seq+1;
3365                                 sk->fin_seq = th->seq;
3366                                 tcp_send_ack(sk->sent_seq, th->seq+1,
3367                                                         sk, th, sk->daddr);
3368         
3369                         case TCP_SYN_RECV:
3370                                 if (!tcp_ack(sk, th, saddr, len)) 
3371                                 {
3372                                         tcp_statistics.TcpAttemptFails++;
3373                                         tcp_reset(daddr, saddr, th,
3374                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
3375                                         kfree_skb(skb, FREE_READ);
3376                                         release_sock(sk);
3377                                         return(0);
3378                                 }
3379 
3380                                 tcp_statistics.TcpCurrEstab++;
3381                                 sk->state = TCP_ESTABLISHED;
3382 
3383                                 /*
3384                                  * Now we need to finish filling out
3385                                  * some of the tcp header.
3386                                  */
3387                                 /* We need to check for mtu info. */
3388                                 tcp_options(sk, th);
3389                                 sk->dummy_th.dest = th->source;
3390                                 sk->copied_seq = sk->acked_seq-1;
3391                                 if (!sk->dead) {
3392                                         sk->state_change(sk);
3393                                 }
3394 
3395                                 /*
3396                                  * We've already processed his first
3397                                  * ack.  In just about all cases that
3398                                  * will have set max_window.  This is
3399                                  * to protect us against the possibility
3400                                  * that the initial window he sent was 0.
3401                                  * This must occur after tcp_options, which
3402                                  * sets sk->mtu.
3403                                  */
3404                                 if (sk->max_window == 0) {
3405                                   sk->max_window = 32;
3406                                   sk->mss = min(sk->max_window, sk->mtu);
3407                                 }
3408 
3409                                 /*
3410                                  * Now process the rest like we were
3411                                  * already in the established state.
3412                                  */
3413                                 if (th->urg) {
3414                                         if (tcp_urg(sk, th, saddr, len)) { 
3415                                                 kfree_skb(skb, FREE_READ);
3416                                                 release_sock(sk);
3417                                                 return(0);
3418                                         }
3419                         }
3420                         if (tcp_data(skb, sk, saddr, len))
3421                                                 kfree_skb(skb, FREE_READ);
3422 
3423                         if (th->fin) tcp_fin(sk, th, saddr, dev);
3424                         release_sock(sk);
3425                         return(0);
3426                 }
3427 
3428                 if (th->urg) {
3429                         if (tcp_urg(sk, th, saddr, len)) {
3430                                 kfree_skb(skb, FREE_READ);
3431                                 release_sock(sk);
3432                                 return(0);
3433                         }
3434                 }
3435 
3436                 if (tcp_data(skb, sk, saddr, len)) {
3437                         kfree_skb(skb, FREE_READ);
3438                         release_sock(sk);
3439                         return(0);
3440                 }
3441 
3442                 if (!th->fin) {
3443                         release_sock(sk);
3444                         return(0);
3445                 }
3446                 tcp_fin(sk, th, saddr, dev);
3447                 release_sock(sk);
3448                 return(0);
3449         }
3450 }
3451 
3452 
3453 /*
3454   * This routine sends a packet with an out of date sequence
3455   * number. It assumes the other end will try to ack it.
3456   */
3457 static void
3458 tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3459 {
3460   struct sk_buff *buff;
3461   struct tcphdr *t1;
3462   struct device *dev=NULL;
3463   int tmp;
3464 
3465   if (sk->zapped)
3466         return; /* Afer a valid reset we can send no more */
3467 
3468   if (sk -> state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT &&
3469       sk -> state != TCP_FIN_WAIT1 && sk->state != TCP_FIN_WAIT2)
3470         return;
3471 
3472   buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
3473   if (buff == NULL) return;
3474 
3475   buff->len = sizeof(struct tcphdr);
3476   buff->free = 1;
3477   buff->sk = sk;
3478   DPRINTF((DBG_TCP, "in tcp_write_wakeup\n"));
3479   t1 = (struct tcphdr *) buff->data;
3480 
3481   /* Put in the IP header and routing stuff. */
3482   tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3483                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
3484   if (tmp < 0) {
3485         sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3486         return;
3487   }
3488 
3489   buff->len += tmp;
3490   t1 = (struct tcphdr *)((char *)t1 +tmp);
3491 
3492   memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
3493 
3494   /*
3495    * Use a previous sequence.
3496    * This should cause the other end to send an ack.
3497    */
3498   t1->seq = htonl(sk->sent_seq-1);
3499   t1->ack = 1; 
3500   t1->res1= 0;
3501   t1->res2= 0;
3502   t1->rst = 0;
3503   t1->urg = 0;
3504   t1->psh = 0;
3505   t1->fin = 0;
3506   t1->syn = 0;
3507   t1->ack_seq = ntohl(sk->acked_seq);
3508   t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
3509   t1->doff = sizeof(*t1)/4;
3510   tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
3511 
3512   /* Send it and free it.
3513    * This will prevent the timer from automatically being restarted.
3514   */
3515   sk->prot->queue_xmit(sk, dev, buff, 1);
3516   tcp_statistics.TcpOutSegs++;
3517 }
3518 
3519 void
3520 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3521 {
3522         if (sk->zapped)
3523                 return;         /* Afer a valid reset we can send no more */
3524 
3525         tcp_write_wakeup(sk);
3526 
3527         sk->backoff++;
3528         sk->rto = min(sk->rto << 1, 120*HZ);
3529         reset_timer (sk, TIME_PROBE0, sk->rto);
3530         sk->retransmits++;
3531         sk->prot->retransmits ++;
3532 }
3533 
3534 /*
3535  *      Socket option code for TCP. 
3536  */
3537   
3538 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
3539 {
3540         int val,err;
3541 
3542         if(level!=SOL_TCP)
3543                 return ip_setsockopt(sk,level,optname,optval,optlen);
3544 
3545         if (optval == NULL) 
3546                 return(-EINVAL);
3547 
3548         err=verify_area(VERIFY_READ, optval, sizeof(int));
3549         if(err)
3550                 return err;
3551         
3552         val = get_fs_long((unsigned long *)optval);
3553 
3554         switch(optname)
3555         {
3556                 case TCP_MAXSEG:
3557 /*                      if(val<200||val>2048 || val>sk->mtu) */
3558 /*
3559  * values greater than interface MTU won't take effect.  however at
3560  * the point when this call is done we typically don't yet know
3561  * which interface is going to be used
3562  */
3563                         if(val<1||val>MAX_WINDOW)
3564                                 return -EINVAL;
3565                         sk->user_mss=val;
3566                         return 0;
3567                 case TCP_NODELAY:
3568                         sk->nonagle=(val==0)?0:1;
3569                         return 0;
3570                 default:
3571                         return(-ENOPROTOOPT);
3572         }
3573 }
3574 
3575 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
3576 {
3577         int val,err;
3578 
3579         if(level!=SOL_TCP)
3580                 return ip_getsockopt(sk,level,optname,optval,optlen);
3581                         
3582         switch(optname)
3583         {
3584                 case TCP_MAXSEG:
3585                         val=sk->user_mss;
3586                         break;
3587                 case TCP_NODELAY:
3588                         val=sk->nonagle;        /* Until Johannes stuff is in */
3589                         break;
3590                 default:
3591                         return(-ENOPROTOOPT);
3592         }
3593         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
3594         if(err)
3595                 return err;
3596         put_fs_long(sizeof(int),(unsigned long *) optlen);
3597 
3598         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
3599         if(err)
3600                 return err;
3601         put_fs_long(val,(unsigned long *)optval);
3602 
3603         return(0);
3604 }       
3605 
3606 
3607 struct proto tcp_prot = {
3608   sock_wmalloc,
3609   sock_rmalloc,
3610   sock_wfree,
3611   sock_rfree,
3612   sock_rspace,
3613   sock_wspace,
3614   tcp_close,
3615   tcp_read,
3616   tcp_write,
3617   tcp_sendto,
3618   tcp_recvfrom,
3619   ip_build_header,
3620   tcp_connect,
3621   tcp_accept,
3622   ip_queue_xmit,
3623   tcp_retransmit,
3624   tcp_write_wakeup,
3625   tcp_read_wakeup,
3626   tcp_rcv,
3627   tcp_select,
3628   tcp_ioctl,
3629   NULL,
3630   tcp_shutdown,
3631   tcp_setsockopt,
3632   tcp_getsockopt,
3633   128,
3634   0,
3635   {NULL,},
3636   "TCP"
3637 };

/* [previous][next][first][last][top][bottom][index][help] */