root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. tcp_set_state
  3. tcp_select_window
  4. tcp_time_wait
  5. tcp_retransmit
  6. tcp_err
  7. tcp_readable
  8. tcp_select
  9. tcp_ioctl
  10. tcp_check
  11. tcp_send_check
  12. tcp_send_skb
  13. tcp_dequeue_partial
  14. tcp_send_partial
  15. tcp_enqueue_partial
  16. tcp_send_ack
  17. tcp_build_header
  18. tcp_write
  19. tcp_sendto
  20. tcp_read_wakeup
  21. cleanup_rbuf
  22. tcp_read_urg
  23. tcp_read
  24. tcp_shutdown
  25. tcp_recvfrom
  26. tcp_reset
  27. tcp_options
  28. default_mask
  29. tcp_conn_request
  30. tcp_close
  31. tcp_write_xmit
  32. sort_send
  33. tcp_ack
  34. tcp_data
  35. tcp_check_urg
  36. tcp_urg
  37. tcp_fin
  38. tcp_accept
  39. tcp_connect
  40. tcp_sequence
  41. tcp_clean_end
  42. tcp_rcv
  43. tcp_write_wakeup
  44. tcp_send_probe0
  45. tcp_setsockopt
  46. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@no.unit.nvg>
  20  *
  21  * Fixes:       
  22  *              Alan Cox        :       Numerous verify_area() calls
  23  *              Alan Cox        :       Set the ACK bit on a reset
  24  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  25  *                                      and was trying to connect (tcp_err()).
  26  *              Alan Cox        :       All icmp error handling was broken
  27  *                                      pointers passed where wrong and the
  28  *                                      socket was looked up backwards. Nobody
  29  *                                      tested any icmp error code obviously.
  30  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  31  *                                      on errors. select behaves and the icmp error race
  32  *                                      has gone by moving it into sock.c
  33  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  34  *                                      packets for unknown sockets.
  35  *              Alan Cox        :       tcp option processing.
  36  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  37  *              Herp Rosmanith  :       More reset fixes
  38  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  39  *                                      any kind of RST is right out.
  40  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  41  *                                      otherwise odd bits of prattle escape still
  42  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  43  *                                      LAN workplace lockups.
  44  *              Alan Cox        :       Some tidyups using the new skb list facilities
  45  *              Alan Cox        :       sk->keepopen now seems to work
  46  *              Alan Cox        :       Pulls options out correctly on accepts
  47  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  48  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  49  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  50  *              Alan Cox        :       Added some beter commenting, as the tcp is hard to follow
  51  *              Alan Cox        :       Removed incorrect check for 20 * psh
  52  *      Michael O'Reilly        :       ack < copied bug fix.
  53  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  54  *              Alan Cox        :       FIN with no memory -> CRASH
  55  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  56  *              Alan Cox        :       Added TCP options (SOL_TCP)
  57  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  58  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  59  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  60  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  61  *              Alan Cox        :       Put in missing check for SYN bit.
  62  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  63  *                                      window non shrink trick.
  64  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  65  *              Charles Hedrick :       TCP fixes
  66  *              Toomas Tamm     :       TCP window fixes
  67  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  68  *              Charles Hedrick :       Rewrote most of it to actually work
  69  *              Linus           :       Rewrote tcp_read() and URG handling
  70  *                                      completely
  71  *              Gerhard Koerting:       Fixed some missing timer handling
  72  *              Matthew Dillon  :       Reworked TCP machine states as per RFC
  73  *              Gerhard Koerting:       PC/TCP workarounds
  74  *              Adam Caldwell   :       Assorted timer/timing errors
  75  *              Matthew Dillon  :       Fixed another RST bug
  76  *              Alan Cox        :       Move to kernel side addressing changes.
  77  *              Alan Cox        :       Beginning work on TCP fastpathing (not yet usable)
  78  *              Arnt Gulbrandsen:       Turbocharged tcp_check() routine.
  79  *              Alan Cox        :       TCP fast path debugging
  80  *              Alan Cox        :       Window clamping
  81  *              Michael Riepe   :       Bug in tcp_check()
  82  *              Matt Dillon     :       More TCP improvements and RST bug fixes
  83  *
  84  *
  85  * To Fix:
  86  *                      Possibly a problem with accept(). BSD accept never fails after
  87  *              it causes a select. Linux can - given the official select semantics I
  88  *              feel that _really_ its the BSD network programs that are bust (notably
  89  *              inetd, which hangs occasionally because of this).
  90  *
  91  *                      Fast path the code. Two things here - fix the window calculation
  92  *              so it doesn't iterate over the queue, also spot packets with no funny
  93  *              options arriving in order and process directly.
  94  *
  95  *              This program is free software; you can redistribute it and/or
  96  *              modify it under the terms of the GNU General Public License
  97  *              as published by the Free Software Foundation; either version
  98  *              2 of the License, or(at your option) any later version.
  99  *
 100  * Description of States:
 101  *
 102  *      TCP_SYN_SENT            sent a connection request, waiting for ack
 103  *
 104  *      TCP_SYN_RECV            received a connection request, sent ack,
 105  *                              waiting for final ack in three-way handshake.
 106  *
 107  *      TCP_ESTABLISHED         connection established
 108  *
 109  *      TCP_FIN_WAIT1           our side has shutdown, waiting to complete
 110  *                              transmission of remaining buffered data
 111  *
 112  *      TCP_FIN_WAIT2           all buffered data sent, waiting for remote
 113  *                              to shutdown
 114  *
 115  *      TCP_CLOSING             both sides have shutdown but we still have
 116  *                              data we have to finish sending
 117  *
 118  *      TCP_TIME_WAIT           timeout to catch resent junk before entering
 119  *                              closed, can only be entered from FIN_WAIT2
 120  *                              or CLOSING.  Required because the other end
 121  *                              may not have gotten our last ACK causing it
 122  *                              to retransmit the data packet (which we ignore)
 123  *
 124  *      TCP_CLOSE_WAIT          remote side has shutdown and is waiting for
 125  *                              us to finish writing our data and to shutdown
 126  *                              (we have to close() to move on to LAST_ACK)
 127  *
 128  *      TCP_LAST_ACK            out side has shutdown after remote has
 129  *                              shutdown.  There may still be data in our
 130  *                              buffer that we have to finish sending
 131  *              
 132  *      TCP_CLOSE               socket is finished
 133  */
 134 #include <linux/types.h>
 135 #include <linux/sched.h>
 136 #include <linux/mm.h>
 137 #include <linux/string.h>
 138 #include <linux/socket.h>
 139 #include <linux/sockios.h>
 140 #include <linux/termios.h>
 141 #include <linux/in.h>
 142 #include <linux/fcntl.h>
 143 #include <linux/inet.h>
 144 #include <linux/netdevice.h>
 145 #include "snmp.h"
 146 #include "ip.h"
 147 #include "protocol.h"
 148 #include "icmp.h"
 149 #include "tcp.h"
 150 #include <linux/skbuff.h>
 151 #include "sock.h"
 152 #include "route.h"
 153 #include <linux/errno.h>
 154 #include <linux/timer.h>
 155 #include <asm/system.h>
 156 #include <asm/segment.h>
 157 #include <linux/mm.h>
 158 
 159 #undef TCP_FASTPATH
 160 
 161 #define SEQ_TICK 3
 162 unsigned long seq_offset;
 163 struct tcp_mib  tcp_statistics;
 164 
 165 #ifdef TCP_FASTPATH
 166 unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0;
 167 #endif
 168 
 169 
 170 static __inline__ int min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 171 {
 172         if (a < b) 
 173                 return(a);
 174         return(b);
 175 }
 176 
 177 #undef STATE_TRACE
 178 
 179 static __inline__ void tcp_set_state(struct sock *sk, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 180 {
 181         if(sk->state==TCP_ESTABLISHED)
 182                 tcp_statistics.TcpCurrEstab--;
 183 #ifdef STATE_TRACE
 184         if(sk->debug)
 185                 printk("TCP sk=%s, State %d -> %d\n",sk, sk->state,state);
 186 #endif  
 187         sk->state=state;
 188         if(state==TCP_ESTABLISHED)
 189                 tcp_statistics.TcpCurrEstab++;
 190 }
 191 
 192 /* This routine picks a TCP windows for a socket based on
 193    the following constraints
 194    
 195    1. The window can never be shrunk once it is offered (RFC 793)
 196    2. We limit memory per socket
 197    
 198    For now we use NET2E3's heuristic of offering half the memory
 199    we have handy. All is not as bad as this seems however because
 200    of two things. Firstly we will bin packets even within the window
 201    in order to get the data we are waiting for into the memory limit.
 202    Secondly we bin common duplicate forms at receive time
 203    
 204    Better heuristics welcome
 205 */
 206    
 207 int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 208 {
 209         int new_window = sk->prot->rspace(sk);
 210         
 211         if(sk->window_clamp)
 212                 new_window=min(sk->window_clamp,new_window);
 213 /*
 214  * two things are going on here.  First, we don't ever offer a
 215  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 216  * receiver side of SWS as specified in RFC1122.
 217  * Second, we always give them at least the window they
 218  * had before, in order to avoid retracting window.  This
 219  * is technically allowed, but RFC1122 advises against it and
 220  * in practice it causes trouble.
 221  */
 222         if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window)
 223                 return(sk->window);
 224         return(new_window);
 225 }
 226 
 227 /*
 228  *      Enter the time wait state. 
 229  */
 230 
 231 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 232 {
 233         tcp_set_state(sk,TCP_TIME_WAIT);
 234         sk->shutdown = SHUTDOWN_MASK;
 235         if (!sk->dead)
 236                 sk->state_change(sk);
 237         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 238 }
 239 
 240 /*
 241  *      A timer event has trigger a tcp retransmit timeout. The
 242  *      socket xmit queue is ready and set up to send. Because
 243  *      the ack receive code keeps the queue straight we do
 244  *      nothing clever here.
 245  */
 246 
 247 static void tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 248 {
 249         if (all) 
 250         {
 251                 ip_retransmit(sk, all);
 252                 return;
 253         }
 254 
 255         sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 256         /* sk->ssthresh in theory can be zero.  I guess that's OK */
 257         sk->cong_count = 0;
 258 
 259         sk->cong_window = 1;
 260 
 261         /* Do the actual retransmit. */
 262         ip_retransmit(sk, all);
 263 }
 264 
 265 
 266 /*
 267  * This routine is called by the ICMP module when it gets some
 268  * sort of error condition.  If err < 0 then the socket should
 269  * be closed and the error returned to the user.  If err > 0
 270  * it's just the icmp type << 8 | icmp code.  After adjustment
 271  * header points to the first 8 bytes of the tcp header.  We need
 272  * to find the appropriate port.
 273  */
 274 
 275 void tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 276         unsigned long saddr, struct inet_protocol *protocol)
 277 {
 278         struct tcphdr *th;
 279         struct sock *sk;
 280         struct iphdr *iph=(struct iphdr *)header;
 281   
 282         header+=4*iph->ihl;
 283    
 284 
 285         th =(struct tcphdr *)header;
 286         sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr);
 287 
 288         if (sk == NULL) 
 289                 return;
 290   
 291         if(err<0)
 292         {
 293                 sk->err = -err;
 294                 sk->error_report(sk);
 295                 return;
 296         }
 297 
 298         if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) 
 299         {
 300                 /*
 301                  * FIXME:
 302                  * For now we will just trigger a linear backoff.
 303                  * The slow start code should cause a real backoff here.
 304                  */
 305                 if (sk->cong_window > 4)
 306                         sk->cong_window--;
 307                 return;
 308         }
 309 
 310 /*      sk->err = icmp_err_convert[err & 0xff].errno;  -- moved as TCP should hide non fatals internally (and does) */
 311 
 312         /*
 313          * If we've already connected we will keep trying
 314          * until we time out, or the user gives up.
 315          */
 316 
 317         if (icmp_err_convert[err & 0xff].fatal || sk->state == TCP_SYN_SENT) 
 318         {
 319                 if (sk->state == TCP_SYN_SENT) 
 320                 {
 321                         tcp_statistics.TcpAttemptFails++;
 322                         tcp_set_state(sk,TCP_CLOSE);
 323                         sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 324                 }
 325                 sk->err = icmp_err_convert[err & 0xff].errno;           
 326         }
 327         return;
 328 }
 329 
 330 
 331 /*
 332  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 333  *      in the received data queue (ie a frame missing that needs sending to us)
 334  */
 335 
 336 static int tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 337 {
 338         unsigned long counted;
 339         unsigned long amount;
 340         struct sk_buff *skb;
 341         int sum;
 342         unsigned long flags;
 343 
 344         if(sk && sk->debug)
 345                 printk("tcp_readable: %p - ",sk);
 346 
 347         save_flags(flags);
 348         cli();
 349         if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 350         {
 351                 restore_flags(flags);
 352                 if(sk && sk->debug) 
 353                         printk("empty\n");
 354                 return(0);
 355         }
 356   
 357         counted = sk->copied_seq+1;     /* Where we are at the moment */
 358         amount = 0;
 359   
 360         /* Do until a push or until we are out of data. */
 361         do 
 362         {
 363                 if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 364                         break;
 365                 sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 366                 if (skb->h.th->syn)
 367                         sum++;
 368                 if (sum >= 0) 
 369                 {                                       /* Add it up, move on */
 370                         amount += sum;
 371                         if (skb->h.th->syn) 
 372                                 amount--;
 373                         counted += sum;
 374                 }
 375                 if (amount && skb->h.th->psh) break;
 376                 skb = skb->next;
 377         }
 378         while(skb != (struct sk_buff *)&sk->receive_queue);
 379 
 380         if (amount && !sk->urginline && sk->urg_data &&
 381             (sk->urg_seq - sk->copied_seq) <= (counted - sk->copied_seq))
 382                 amount--;               /* don't count urg data */
 383         restore_flags(flags);
 384         if(sk->debug)
 385                 printk("got %lu bytes.\n",amount);
 386         return(amount);
 387 }
 388 
 389 
 390 /*
 391  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 392  *      listening socket has a receive queue of sockets to accept.
 393  */
 394 
 395 static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 396 {
 397         sk->inuse = 1;
 398 
 399         switch(sel_type) 
 400         {
 401                 case SEL_IN:
 402                         if(sk->debug)
 403                                 printk("select in");
 404                         select_wait(sk->sleep, wait);
 405                         if(sk->debug)
 406                                 printk("-select out");
 407                         if (skb_peek(&sk->receive_queue) != NULL) 
 408                         {
 409                                 if (sk->state == TCP_LISTEN || tcp_readable(sk)) 
 410                                 {
 411                                         release_sock(sk);
 412                                         if(sk->debug)
 413                                                 printk("-select ok data\n");
 414                                         return(1);
 415                                 }
 416                         }
 417                         if (sk->err != 0)       /* Receiver error */
 418                         {
 419                                 release_sock(sk);
 420                                 if(sk->debug)
 421                                         printk("-select ok error");
 422                                 return(1);
 423                         }
 424                         if (sk->shutdown & RCV_SHUTDOWN) 
 425                         {
 426                                 release_sock(sk);
 427                                 if(sk->debug)
 428                                         printk("-select ok down\n");
 429                                 return(1);
 430                         } 
 431                         else 
 432                         {
 433                                 release_sock(sk);
 434                                 if(sk->debug)
 435                                         printk("-select fail\n");
 436                                 return(0);
 437                         }
 438                 case SEL_OUT:
 439                         select_wait(sk->sleep, wait);
 440                         if (sk->shutdown & SEND_SHUTDOWN) 
 441                         {
 442                                 /* FIXME: should this return an error? */
 443                                 release_sock(sk);
 444                                 return(0);
 445                         }
 446 
 447                         /*
 448                          * FIXME:
 449                          * Hack so it will probably be able to write
 450                          * something if it says it's ok to write.
 451                          */
 452                         
 453                         if (sk->prot->wspace(sk) >= sk->mss) 
 454                         {
 455                                 release_sock(sk);
 456                                 /* This should cause connect to work ok. */
 457                                 if (sk->state == TCP_SYN_RECV ||
 458                                     sk->state == TCP_SYN_SENT) return(0);
 459                                 return(1);
 460                         }
 461                         release_sock(sk);
 462                         return(0);
 463                 case SEL_EX:
 464                         select_wait(sk->sleep,wait);
 465                         if (sk->err || sk->urg_data) 
 466                         {
 467                                 release_sock(sk);
 468                                 return(1);
 469                         }
 470                         release_sock(sk);
 471                         return(0);
 472         }
 473 
 474         release_sock(sk);
 475         return(0);
 476 }
 477 
 478 
 479 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 480 {
 481         int err;
 482         switch(cmd) 
 483         {
 484 
 485                 case TIOCINQ:
 486 #ifdef FIXME    /* FIXME: */
 487                 case FIONREAD:
 488 #endif
 489                 {
 490                         unsigned long amount;
 491 
 492                         if (sk->state == TCP_LISTEN) 
 493                                 return(-EINVAL);
 494 
 495                         sk->inuse = 1;
 496                         amount = tcp_readable(sk);
 497                         release_sock(sk);
 498                         err=verify_area(VERIFY_WRITE,(void *)arg,
 499                                                    sizeof(unsigned long));
 500                         if(err)
 501                                 return err;
 502                         put_fs_long(amount,(unsigned long *)arg);
 503                         return(0);
 504                 }
 505                 case SIOCATMARK:
 506                 {
 507                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
 508 
 509                         err = verify_area(VERIFY_WRITE,(void *) arg,
 510                                                   sizeof(unsigned long));
 511                         if (err)
 512                                 return err;
 513                         put_fs_long(answ,(int *) arg);
 514                         return(0);
 515                 }
 516                 case TIOCOUTQ:
 517                 {
 518                         unsigned long amount;
 519 
 520                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 521                         amount = sk->prot->wspace(sk);
 522                         err=verify_area(VERIFY_WRITE,(void *)arg,
 523                                                    sizeof(unsigned long));
 524                         if(err)
 525                                 return err;
 526                         put_fs_long(amount,(unsigned long *)arg);
 527                         return(0);
 528                 }
 529                 default:
 530                         return(-EINVAL);
 531         }
 532 }
 533 
 534 
 535 /*
 536  *      This routine computes a TCP checksum. 
 537  */
 538  
 539 unsigned short tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 540           unsigned long saddr, unsigned long daddr)
 541 {     
 542         unsigned long sum;
 543    
 544         if (saddr == 0) saddr = ip_my_addr();
 545 
 546 /*
 547  * stupid, gcc complains when I use just one __asm__ block,
 548  * something about too many reloads, but this is just two
 549  * instructions longer than what I want
 550  */
 551         __asm__("
 552             addl %%ecx, %%ebx
 553             adcl %%edx, %%ebx
 554             adcl $0, %%ebx
 555             "
 556         : "=b"(sum)
 557         : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 558         : "bx", "cx", "dx" );
 559         __asm__("
 560             movl %%ecx, %%edx
 561             cld
 562             cmpl $32, %%ecx
 563             jb 2f
 564             shrl $5, %%ecx
 565             clc
 566 1:          lodsl
 567             adcl %%eax, %%ebx
 568             lodsl
 569             adcl %%eax, %%ebx
 570             lodsl
 571             adcl %%eax, %%ebx
 572             lodsl
 573             adcl %%eax, %%ebx
 574             lodsl
 575             adcl %%eax, %%ebx
 576             lodsl
 577             adcl %%eax, %%ebx
 578             lodsl
 579             adcl %%eax, %%ebx
 580             lodsl
 581             adcl %%eax, %%ebx
 582             loop 1b
 583             adcl $0, %%ebx
 584             movl %%edx, %%ecx
 585 2:          andl $28, %%ecx
 586             je 4f
 587             shrl $2, %%ecx
 588             clc
 589 3:          lodsl
 590             adcl %%eax, %%ebx
 591             loop 3b
 592             adcl $0, %%ebx
 593 4:          movl $0, %%eax
 594             testw $2, %%dx
 595             je 5f
 596             lodsw
 597             addl %%eax, %%ebx
 598             adcl $0, %%ebx
 599             movw $0, %%ax
 600 5:          test $1, %%edx
 601             je 6f
 602             lodsb
 603             addl %%eax, %%ebx
 604             adcl $0, %%ebx
 605 6:          movl %%ebx, %%eax
 606             shrl $16, %%eax
 607             addw %%ax, %%bx
 608             adcw $0, %%bx
 609             "
 610         : "=b"(sum)
 611         : "0"(sum), "c"(len), "S"(th)
 612         : "ax", "bx", "cx", "dx", "si" );
 613 
 614         /* We only want the bottom 16 bits, but we never cleared the top 16. */
 615   
 616         return((~sum) & 0xffff);
 617 }
 618 
 619 
 620 
 621 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 622                 unsigned long daddr, int len, struct sock *sk)
 623 {
 624         th->check = 0;
 625         th->check = tcp_check(th, len, saddr, daddr);
 626         return;
 627 }
 628 
 629 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 630 {
 631         int size;
 632         struct tcphdr * th = skb->h.th;
 633 
 634         /* length of packet (not counting length of pre-tcp headers) */
 635         size = skb->len - ((unsigned char *) th - skb->data);
 636 
 637         /* sanity check it.. */
 638         if (size < sizeof(struct tcphdr) || size > skb->len) 
 639         {
 640                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 641                         skb, skb->data, th, skb->len);
 642                 kfree_skb(skb, FREE_WRITE);
 643                 return;
 644         }
 645 
 646         /* If we have queued a header size packet.. */
 647         if (size == sizeof(struct tcphdr)) 
 648         {
 649                 /* If its got a syn or fin its notionally included in the size..*/
 650                 if(!th->syn && !th->fin) 
 651                 {
 652                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 653                         kfree_skb(skb,FREE_WRITE);
 654                         return;
 655                 }
 656         }
 657 
 658         tcp_statistics.TcpOutSegs++;  
 659 
 660         skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
 661         if (after(skb->h.seq, sk->window_seq) ||
 662             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 663              sk->packets_out >= sk->cong_window) 
 664         {
 665                 /* checksum will be supplied by tcp_write_xmit.  So
 666                  * we shouldn't need to set it at all.  I'm being paranoid */
 667                 th->check = 0;
 668                 if (skb->next != NULL) 
 669                 {
 670                         printk("tcp_send_partial: next != NULL\n");
 671                         skb_unlink(skb);
 672                 }
 673                 skb_queue_tail(&sk->write_queue, skb);
 674                 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 675                     sk->send_head == NULL &&
 676                     sk->ack_backlog == 0)
 677                         reset_timer(sk, TIME_PROBE0, sk->rto);
 678         } 
 679         else 
 680         {
 681                 th->ack_seq = ntohl(sk->acked_seq);
 682                 th->window = ntohs(tcp_select_window(sk));
 683 
 684                 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
 685 
 686                 sk->sent_seq = sk->write_seq;
 687                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 688         }
 689 }
 690 
 691 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 692 {
 693         struct sk_buff * skb;
 694         unsigned long flags;
 695 
 696         save_flags(flags);
 697         cli();
 698         skb = sk->partial;
 699         if (skb) {
 700                 sk->partial = NULL;
 701                 del_timer(&sk->partial_timer);
 702         }
 703         restore_flags(flags);
 704         return skb;
 705 }
 706 
 707 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 708 {
 709         struct sk_buff *skb;
 710 
 711         if (sk == NULL)
 712                 return;
 713         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 714                 tcp_send_skb(sk, skb);
 715 }
 716 
 717 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 718 {
 719         struct sk_buff * tmp;
 720         unsigned long flags;
 721 
 722         save_flags(flags);
 723         cli();
 724         tmp = sk->partial;
 725         if (tmp)
 726                 del_timer(&sk->partial_timer);
 727         sk->partial = skb;
 728         init_timer(&sk->partial_timer);
 729         sk->partial_timer.expires = HZ;
 730         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 731         sk->partial_timer.data = (unsigned long) sk;
 732         add_timer(&sk->partial_timer);
 733         restore_flags(flags);
 734         if (tmp)
 735                 tcp_send_skb(sk, tmp);
 736 }
 737 
 738 
 739 /*
 740  *      This routine sends an ack and also updates the window. 
 741  */
 742  
 743 static void tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 744              struct sock *sk,
 745              struct tcphdr *th, unsigned long daddr)
 746 {
 747         struct sk_buff *buff;
 748         struct tcphdr *t1;
 749         struct device *dev = NULL;
 750         int tmp;
 751 
 752         if(sk->zapped)
 753                 return;         /* We have been reset, we may not send again */
 754         /*
 755          * We need to grab some memory, and put together an ack,
 756          * and then put it into the queue to be sent.
 757          */
 758 
 759         buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 760         if (buff == NULL) 
 761         {
 762                 /* Force it to send an ack. */
 763                 sk->ack_backlog++;
 764                 if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) 
 765                 {
 766                         reset_timer(sk, TIME_WRITE, 10);
 767                 }
 768                 return;
 769         }
 770 
 771         buff->len = sizeof(struct tcphdr);
 772         buff->sk = sk;
 773         buff->localroute = sk->localroute;
 774         t1 =(struct tcphdr *) buff->data;
 775 
 776         /* Put in the IP header and routing stuff. */
 777         tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 778                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 779         if (tmp < 0) 
 780         {
 781                 buff->free=1;
 782                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 783                 return;
 784         }
 785         buff->len += tmp;
 786         t1 =(struct tcphdr *)((char *)t1 +tmp);
 787 
 788         /* FIXME: */
 789         memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 790 
 791         /*
 792          *      Swap the send and the receive. 
 793          */
 794          
 795         t1->dest = th->source;
 796         t1->source = th->dest;
 797         t1->seq = ntohl(sequence);
 798         t1->ack = 1;
 799         sk->window = tcp_select_window(sk);
 800         t1->window = ntohs(sk->window);
 801         t1->res1 = 0;
 802         t1->res2 = 0;
 803         t1->rst = 0;
 804         t1->urg = 0;
 805         t1->syn = 0;
 806         t1->psh = 0;
 807         t1->fin = 0;
 808         if (ack == sk->acked_seq) 
 809         {
 810                 sk->ack_backlog = 0;
 811                 sk->bytes_rcv = 0;
 812                 sk->ack_timed = 0;
 813                 if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
 814                                   && sk->timeout == TIME_WRITE) 
 815                 {
 816                         if(sk->keepopen) {
 817                                 reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 818                         } else {
 819                                 delete_timer(sk);
 820                         }
 821                 }
 822         }
 823         t1->ack_seq = ntohl(ack);
 824         t1->doff = sizeof(*t1)/4;
 825         tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 826         if (sk->debug)
 827                  printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 828         tcp_statistics.TcpOutSegs++;
 829         sk->prot->queue_xmit(sk, dev, buff, 1);
 830 }
 831 
 832 
 833 /* 
 834  *      This routine builds a generic TCP header. 
 835  */
 836  
 837 static int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 838 {
 839 
 840         /* FIXME: want to get rid of this. */
 841         memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 842         th->seq = htonl(sk->write_seq);
 843         th->psh =(push == 0) ? 1 : 0;
 844         th->doff = sizeof(*th)/4;
 845         th->ack = 1;
 846         th->fin = 0;
 847         sk->ack_backlog = 0;
 848         sk->bytes_rcv = 0;
 849         sk->ack_timed = 0;
 850         th->ack_seq = htonl(sk->acked_seq);
 851         sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/;
 852         th->window = htons(sk->window);
 853 
 854         return(sizeof(*th));
 855 }
 856 
 857 /*
 858  *      This routine copies from a user buffer into a socket,
 859  *      and starts the transmit system.
 860  */
 861 
 862 static int tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 863           int len, int nonblock, unsigned flags)
 864 {
 865         int copied = 0;
 866         int copy;
 867         int tmp;
 868         struct sk_buff *skb;
 869         struct sk_buff *send_tmp;
 870         unsigned char *buff;
 871         struct proto *prot;
 872         struct device *dev = NULL;
 873 
 874         sk->inuse=1;
 875         prot = sk->prot;
 876         while(len > 0) 
 877         {
 878                 if (sk->err) 
 879                 {                       /* Stop on an error */
 880                         release_sock(sk);
 881                         if (copied) 
 882                                 return(copied);
 883                         tmp = -sk->err;
 884                         sk->err = 0;
 885                         return(tmp);
 886                 }
 887 
 888         /*
 889          *      First thing we do is make sure that we are established. 
 890          */
 891         
 892                 if (sk->shutdown & SEND_SHUTDOWN) 
 893                 {
 894                         release_sock(sk);
 895                         sk->err = EPIPE;
 896                         if (copied) 
 897                                 return(copied);
 898                         sk->err = 0;
 899                         return(-EPIPE);
 900                 }
 901 
 902 
 903         /* 
 904          *      Wait for a connection to finish.
 905          */
 906         
 907                 while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
 908                 {
 909                         if (sk->err) 
 910                         {
 911                                 release_sock(sk);
 912                                 if (copied) 
 913                                         return(copied);
 914                                 tmp = -sk->err;
 915                                 sk->err = 0;
 916                                 return(tmp);
 917                         }
 918 
 919                         if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
 920                         {
 921                                 release_sock(sk);
 922                                 if (copied) 
 923                                         return(copied);
 924 
 925                                 if (sk->err) 
 926                                 {
 927                                         tmp = -sk->err;
 928                                         sk->err = 0;
 929                                         return(tmp);
 930                                 }
 931 
 932                                 if (sk->keepopen) 
 933                                 {
 934                                         send_sig(SIGPIPE, current, 0);
 935                                 }
 936                                 return(-EPIPE);
 937                         }
 938 
 939                         if (nonblock || copied) 
 940                         {
 941                                 release_sock(sk);
 942                                 if (copied) 
 943                                         return(copied);
 944                                 return(-EAGAIN);
 945                         }
 946 
 947                         release_sock(sk);
 948                         cli();
 949                 
 950                         if (sk->state != TCP_ESTABLISHED &&
 951                                 sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
 952                         {
 953                                 interruptible_sleep_on(sk->sleep);
 954                                 if (current->signal & ~current->blocked) 
 955                                 {
 956                                         sti();
 957                                         if (copied) 
 958                                                 return(copied);
 959                                         return(-ERESTARTSYS);
 960                                 }
 961                         }
 962                         sk->inuse = 1;
 963                         sti();
 964                 }
 965 
 966         /*
 967          * The following code can result in copy <= if sk->mss is ever
 968          * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
 969          * sk->mtu is constant once SYN processing is finished.  I.e. we
 970          * had better not get here until we've seen his SYN and at least one
 971          * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
 972          * But ESTABLISHED should guarantee that.  sk->max_window is by definition
 973          * non-decreasing.  Note that any ioctl to set user_mss must be done
 974          * before the exchange of SYN's.  If the initial ack from the other
 975          * end has a window of 0, max_window and thus mss will both be 0.
 976          */
 977 
 978         /* 
 979          *      Now we need to check if we have a half built packet. 
 980          */
 981 
 982                 if ((skb = tcp_dequeue_partial(sk)) != NULL) 
 983                 {
 984                         int hdrlen;
 985 
 986                          /* IP header + TCP header */
 987                         hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
 988                                  + sizeof(struct tcphdr);
 989         
 990                         /* Add more stuff to the end of skb->len */
 991                         if (!(flags & MSG_OOB)) 
 992                         {
 993                                 copy = min(sk->mss - (skb->len - hdrlen), len);
 994                                 /* FIXME: this is really a bug. */
 995                                 if (copy <= 0) 
 996                                 {
 997                                         printk("TCP: **bug**: \"copy\" <= 0!!\n");
 998                                         copy = 0;
 999                                 }
1000           
1001                                 memcpy_fromfs(skb->data + skb->len, from, copy);
1002                                 skb->len += copy;
1003                                 from += copy;
1004                                 copied += copy;
1005                                 len -= copy;
1006                                 sk->write_seq += copy;
1007                         }
1008                         if ((skb->len - hdrlen) >= sk->mss ||
1009                                 (flags & MSG_OOB) || !sk->packets_out)
1010                                 tcp_send_skb(sk, skb);
1011                         else
1012                                 tcp_enqueue_partial(skb, sk);
1013                         continue;
1014                 }
1015 
1016         /*
1017          * We also need to worry about the window.
1018          * If window < 1/2 the maximum window we've seen from this
1019          *   host, don't use it.  This is sender side
1020          *   silly window prevention, as specified in RFC1122.
1021          *   (Note that this is different than earlier versions of
1022          *   SWS prevention, e.g. RFC813.).  What we actually do is 
1023          *   use the whole MSS.  Since the results in the right
1024          *   edge of the packet being outside the window, it will
1025          *   be queued for later rather than sent.
1026          */
1027 
1028                 copy = sk->window_seq - sk->write_seq;
1029                 if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
1030                         copy = sk->mss;
1031                 if (copy > len)
1032                         copy = len;
1033 
1034         /*
1035          *      We should really check the window here also. 
1036          */
1037          
1038                 send_tmp = NULL;
1039                 if (copy < sk->mss && !(flags & MSG_OOB)) 
1040                 {
1041                         /*
1042                          *      We will release the socket incase we sleep here. 
1043                          */
1044                         release_sock(sk);
1045                         /*
1046                          *      NB: following must be mtu, because mss can be increased.
1047                          *      mss is always <= mtu 
1048                          */
1049                         skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
1050                         sk->inuse = 1;
1051                         send_tmp = skb;
1052                 } 
1053                 else 
1054                 {
1055                         /*
1056                          *      We will release the socket incase we sleep here. 
1057                          */
1058                         release_sock(sk);
1059                         skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
1060                         sk->inuse = 1;
1061                 }
1062 
1063                 /*
1064                  *      If we didn't get any memory, we need to sleep. 
1065                  */
1066 
1067                 if (skb == NULL) 
1068                 {
1069                         if (nonblock /* || copied */) 
1070                         {
1071                                 release_sock(sk);
1072                                 if (copied) 
1073                                         return(copied);
1074                                 return(-EAGAIN);
1075                         }
1076 
1077                         /*
1078                          *      FIXME: here is another race condition. 
1079                          */
1080 
1081                         tmp = sk->wmem_alloc;
1082                         release_sock(sk);
1083                         cli();
1084                         /*
1085                          *      Again we will try to avoid it. 
1086                          */
1087                         if (tmp <= sk->wmem_alloc &&
1088                                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
1089                                 && sk->err == 0) 
1090                         {
1091                                 interruptible_sleep_on(sk->sleep);
1092                                 if (current->signal & ~current->blocked) 
1093                                 {
1094                                         sti();
1095                                         if (copied) 
1096                                                 return(copied);
1097                                         return(-ERESTARTSYS);
1098                                 }
1099                         }
1100                         sk->inuse = 1;
1101                         sti();
1102                         continue;
1103                 }
1104 
1105                 skb->len = 0;
1106                 skb->sk = sk;
1107                 skb->free = 0;
1108                 skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
1109         
1110                 buff = skb->data;
1111         
1112                 /*
1113                  * FIXME: we need to optimize this.
1114                  * Perhaps some hints here would be good.
1115                  */
1116                 
1117                 tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1118                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1119                 if (tmp < 0 ) 
1120                 {
1121                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1122                         release_sock(sk);
1123                         if (copied) 
1124                                 return(copied);
1125                         return(tmp);
1126                 }
1127                 skb->len += tmp;
1128                 skb->dev = dev;
1129                 buff += tmp;
1130                 skb->h.th =(struct tcphdr *) buff;
1131                 tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1132                 if (tmp < 0) 
1133                 {
1134                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1135                         release_sock(sk);
1136                         if (copied) 
1137                                 return(copied);
1138                         return(tmp);
1139                 }
1140 
1141                 if (flags & MSG_OOB) 
1142                 {
1143                         ((struct tcphdr *)buff)->urg = 1;
1144                         ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1145                 }
1146                 skb->len += tmp;
1147                 memcpy_fromfs(buff+tmp, from, copy);
1148 
1149                 from += copy;
1150                 copied += copy;
1151                 len -= copy;
1152                 skb->len += copy;
1153                 skb->free = 0;
1154                 sk->write_seq += copy;
1155         
1156                 if (send_tmp != NULL && sk->packets_out) 
1157                 {
1158                         tcp_enqueue_partial(send_tmp, sk);
1159                         continue;
1160                 }
1161                 tcp_send_skb(sk, skb);
1162         }
1163         sk->err = 0;
1164 
1165 /*
1166  *      Nagle's rule. Turn Nagle off with TCP_NODELAY for highly
1167  *      interactive fast network servers. It's meant to be on and
1168  *      it really improves the throughput though not the echo time
1169  *      on my slow slip link - Alan
1170  */
1171 
1172 /*
1173  *      Avoid possible race on send_tmp - c/o Johannes Stille 
1174  */
1175  
1176         if(sk->partial && ((!sk->packets_out) 
1177      /* If not nagling we can send on the before case too.. */
1178               || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1179         ))
1180                 tcp_send_partial(sk);
1181 
1182         release_sock(sk);
1183         return(copied);
1184 }
1185 
1186 
1187 static int tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1188            int len, int nonblock, unsigned flags,
1189            struct sockaddr_in *addr, int addr_len)
1190 {
1191         if (flags & ~(MSG_OOB|MSG_DONTROUTE))
1192                 return -EINVAL;
1193         if (addr_len < sizeof(*addr)) 
1194                 return(-EINVAL);
1195         if (addr->sin_family && addr->sin_family != AF_INET) 
1196                 return(-EINVAL);
1197         if (addr->sin_port != sk->dummy_th.dest) 
1198                 return(-EISCONN);
1199         if (addr->sin_addr.s_addr != sk->daddr) 
1200                 return(-EISCONN);
1201         return(tcp_write(sk, from, len, nonblock, flags));
1202 }
1203 
1204 
1205 static void tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1206 {
1207         int tmp;
1208         struct device *dev = NULL;
1209         struct tcphdr *t1;
1210         struct sk_buff *buff;
1211 
1212         if (!sk->ack_backlog) 
1213                 return;
1214 
1215         /*
1216          * FIXME: we need to put code here to prevent this routine from
1217          * being called.  Being called once in a while is ok, so only check
1218          * if this is the second time in a row.
1219          */
1220 
1221         /*
1222          * We need to grab some memory, and put together an ack,
1223          * and then put it into the queue to be sent.
1224          */
1225 
1226         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1227         if (buff == NULL) 
1228         {
1229                 /* Try again real soon. */
1230                 reset_timer(sk, TIME_WRITE, 10);
1231                 return;
1232         }
1233 
1234         buff->len = sizeof(struct tcphdr);
1235         buff->sk = sk;
1236         buff->localroute = sk->localroute;
1237         
1238         /*
1239          *      Put in the IP header and routing stuff. 
1240          */
1241 
1242         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1243                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1244         if (tmp < 0) 
1245         {
1246                 buff->free=1;
1247                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1248                 return;
1249         }
1250 
1251         buff->len += tmp;
1252         t1 =(struct tcphdr *)(buff->data +tmp);
1253 
1254         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1255         t1->seq = htonl(sk->sent_seq);
1256         t1->ack = 1;
1257         t1->res1 = 0;
1258         t1->res2 = 0;
1259         t1->rst = 0;
1260         t1->urg = 0;
1261         t1->syn = 0;
1262         t1->psh = 0;
1263         sk->ack_backlog = 0;
1264         sk->bytes_rcv = 0;
1265         sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
1266         t1->window = ntohs(sk->window);
1267         t1->ack_seq = ntohl(sk->acked_seq);
1268         t1->doff = sizeof(*t1)/4;
1269         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1270         sk->prot->queue_xmit(sk, dev, buff, 1);
1271         tcp_statistics.TcpOutSegs++;
1272 }
1273 
1274 
1275 /*
1276  *      FIXME:
1277  *      This routine frees used buffers.
1278  *      It should consider sending an ACK to let the
1279  *      other end know we now have a bigger window.
1280  */
1281 
1282 static void cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1283 {
1284         unsigned long flags;
1285         unsigned long left;
1286         struct sk_buff *skb;
1287         unsigned long rspace;
1288 
1289         if(sk->debug)
1290                 printk("cleaning rbuf for sk=%p\n", sk);
1291   
1292         save_flags(flags);
1293         cli();
1294   
1295         left = sk->prot->rspace(sk);
1296  
1297         /*
1298          * We have to loop through all the buffer headers,
1299          * and try to free up all the space we can.
1300          */
1301 
1302         while((skb=skb_peek(&sk->receive_queue)) != NULL) 
1303         {
1304                 if (!skb->used) 
1305                         break;
1306                 skb_unlink(skb);
1307                 skb->sk = sk;
1308                 kfree_skb(skb, FREE_READ);
1309         }
1310 
1311         restore_flags(flags);
1312 
1313         /*
1314          * FIXME:
1315          * At this point we should send an ack if the difference
1316          * in the window, and the amount of space is bigger than
1317          * TCP_WINDOW_DIFF.
1318          */
1319 
1320         if(sk->debug)
1321                 printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk),
1322                                             left);
1323         if ((rspace=sk->prot->rspace(sk)) != left) 
1324         {
1325                 /*
1326                  * This area has caused the most trouble.  The current strategy
1327                  * is to simply do nothing if the other end has room to send at
1328                  * least 3 full packets, because the ack from those will auto-
1329                  * matically update the window.  If the other end doesn't think
1330                  * we have much space left, but we have room for at least 1 more
1331                  * complete packet than it thinks we do, we will send an ack
1332                  * immediately.  Otherwise we will wait up to .5 seconds in case
1333                  * the user reads some more.
1334                  */
1335                 sk->ack_backlog++;
1336         /*
1337          * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1338          * if the other end is offering a window smaller than the agreed on MSS
1339          * (called sk->mtu here).  In theory there's no connection between send
1340          * and receive, and so no reason to think that they're going to send
1341          * small packets.  For the moment I'm using the hack of reducing the mss
1342          * only on the send side, so I'm putting mtu here.
1343          */
1344 
1345                 if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) 
1346                 {
1347                         /* Send an ack right now. */
1348                         tcp_read_wakeup(sk);
1349                 } 
1350                 else 
1351                 {
1352                         /* Force it to send an ack soon. */
1353                         int was_active = del_timer(&sk->timer);
1354                         if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
1355                         {
1356                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1357                         } 
1358                         else
1359                                 add_timer(&sk->timer);
1360                 }
1361         }
1362 } 
1363 
1364 
1365 /*
1366  *      Handle reading urgent data. 
1367  */
1368  
1369 static int tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1370              unsigned char *to, int len, unsigned flags)
1371 {
1372         struct wait_queue wait = { current, NULL };
1373 
1374         while (len > 0) 
1375         {
1376                 if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1377                         return -EINVAL;
1378                 if (sk->urg_data & URG_VALID) 
1379                 {
1380                         char c = sk->urg_data;
1381                         if (!(flags & MSG_PEEK))
1382                                 sk->urg_data = URG_READ;
1383                         put_fs_byte(c, to);
1384                         return 1;
1385                 }
1386 
1387                 if (sk->err) 
1388                 {
1389                         int tmp = -sk->err;
1390                         sk->err = 0;
1391                         return tmp;
1392                 }
1393 
1394                 if (sk->state == TCP_CLOSE || sk->done) 
1395                 {
1396                         if (!sk->done) {
1397                                 sk->done = 1;
1398                                 return 0;
1399                         }
1400                         return -ENOTCONN;
1401                 }
1402 
1403                 if (sk->shutdown & RCV_SHUTDOWN) 
1404                 {
1405                         sk->done = 1;
1406                         return 0;
1407                 }
1408 
1409                 if (nonblock)
1410                         return -EAGAIN;
1411 
1412                 if (current->signal & ~current->blocked)
1413                         return -ERESTARTSYS;
1414 
1415                 current->state = TASK_INTERRUPTIBLE;
1416                 add_wait_queue(sk->sleep, &wait);
1417                 if ((sk->urg_data & URG_NOTYET) && sk->err == 0 &&
1418                     !(sk->shutdown & RCV_SHUTDOWN))
1419                         schedule();
1420                 remove_wait_queue(sk->sleep, &wait);
1421                 current->state = TASK_RUNNING;
1422         }
1423         return 0;
1424 }
1425 
1426 
1427 /*
1428  *      This routine copies from a sock struct into the user buffer. 
1429  */
1430  
1431 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1432         int len, int nonblock, unsigned flags)
1433 {
1434         struct wait_queue wait = { current, NULL };
1435         int copied = 0;
1436         unsigned long peek_seq;
1437         unsigned long *seq;
1438         unsigned long used;
1439 
1440         /* This error should be checked. */
1441         if (sk->state == TCP_LISTEN)
1442                 return -ENOTCONN;
1443 
1444         /* Urgent data needs to be handled specially. */
1445         if (flags & MSG_OOB)
1446                 return tcp_read_urg(sk, nonblock, to, len, flags);
1447 
1448         peek_seq = sk->copied_seq;
1449         seq = &sk->copied_seq;
1450         if (flags & MSG_PEEK)
1451                 seq = &peek_seq;
1452 
1453         add_wait_queue(sk->sleep, &wait);
1454         sk->inuse = 1;
1455         while (len > 0) 
1456         {
1457                 struct sk_buff * skb;
1458                 unsigned long offset;
1459         
1460                 /*
1461                  * are we at urgent data? Stop if we have read anything.
1462                  */
1463                 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1464                         break;
1465 
1466                 current->state = TASK_INTERRUPTIBLE;
1467 
1468                 skb = skb_peek(&sk->receive_queue);
1469                 do 
1470                 {
1471                         if (!skb)
1472                                 break;
1473                         if (before(1+*seq, skb->h.th->seq))
1474                                 break;
1475                         offset = 1 + *seq - skb->h.th->seq;
1476                         if (skb->h.th->syn)
1477                                 offset--;
1478                         if (offset < skb->len)
1479                                 goto found_ok_skb;
1480                         if (!(flags & MSG_PEEK))
1481                                 skb->used = 1;
1482                         skb = skb->next;
1483                 }
1484                 while (skb != (struct sk_buff *)&sk->receive_queue);
1485 
1486                 if (copied)
1487                         break;
1488 
1489                 if (sk->err) 
1490                 {
1491                         copied = -sk->err;
1492                         sk->err = 0;
1493                         break;
1494                 }
1495 
1496                 if (sk->state == TCP_CLOSE) 
1497                 {
1498                         if (!sk->done) 
1499                         {
1500                                 sk->done = 1;
1501                                 break;
1502                         }
1503                         copied = -ENOTCONN;
1504                         break;
1505                 }
1506 
1507                 if (sk->shutdown & RCV_SHUTDOWN) 
1508                 {
1509                         sk->done = 1;
1510                         break;
1511                 }
1512                         
1513                 if (nonblock) 
1514                 {
1515                         copied = -EAGAIN;
1516                         break;
1517                 }
1518 
1519                 cleanup_rbuf(sk);
1520                 release_sock(sk);
1521                 schedule();
1522                 sk->inuse = 1;
1523 
1524                 if (current->signal & ~current->blocked) 
1525                 {
1526                         copied = -ERESTARTSYS;
1527                         break;
1528                 }
1529                 continue;
1530 
1531         found_ok_skb:
1532                 /* Ok so how much can we use ? */
1533                 used = skb->len - offset;
1534                 if (len < used)
1535                         used = len;
1536                 /* do we have urgent data here? */
1537                 if (sk->urg_data) 
1538                 {
1539                         unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1540                         if (urg_offset < used) 
1541                         {
1542                                 if (!urg_offset) 
1543                                 {
1544                                         if (!sk->urginline) 
1545                                         {
1546                                                 ++*seq;
1547                                                 offset++;
1548                                                 used--;
1549                                         }
1550                                 }
1551                                 else
1552                                         used = urg_offset;
1553                         }
1554                 }
1555                 /* Copy it */
1556                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1557                         skb->h.th->doff*4 + offset, used);
1558                 copied += used;
1559                 len -= used;
1560                 to += used;
1561                 *seq += used;
1562                 if (after(sk->copied_seq+1,sk->urg_seq))
1563                         sk->urg_data = 0;
1564                 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1565                         skb->used = 1;
1566         }
1567         remove_wait_queue(sk->sleep, &wait);
1568         current->state = TASK_RUNNING;
1569 
1570         /* Clean up data we have read: This will do ACK frames */
1571         cleanup_rbuf(sk);
1572         release_sock(sk);
1573         return copied;
1574 }
1575 
1576  
1577 /*
1578  *      Shutdown the sending side of a connection.
1579  */
1580 
1581 void tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1582 {
1583         struct sk_buff *buff;
1584         struct tcphdr *t1, *th;
1585         struct proto *prot;
1586         int tmp;
1587         struct device *dev = NULL;
1588 
1589         /*
1590          * We need to grab some memory, and put together a FIN,
1591          * and then put it into the queue to be sent.
1592          * FIXME:
1593          *
1594          *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1595          *      Most of this is guesswork, so maybe it will work...
1596          */
1597 
1598         if (!(how & SEND_SHUTDOWN)) 
1599                 return;
1600          
1601         /*
1602          *      If we've already sent a FIN, return. 
1603          */
1604          
1605         if (sk->state == TCP_FIN_WAIT1 ||
1606             sk->state == TCP_FIN_WAIT2 ||
1607             sk->state == TCP_CLOSING ||
1608             sk->state == TCP_LAST_ACK ||
1609             sk->state == TCP_TIME_WAIT
1610         ) 
1611         {
1612                 return;
1613         }
1614         sk->inuse = 1;
1615 
1616         /*
1617          * flag that the sender has shutdown
1618          */
1619 
1620         sk->shutdown |= SEND_SHUTDOWN;
1621 
1622         /*
1623          *  Clear out any half completed packets. 
1624          */
1625 
1626         if (sk->partial)
1627                 tcp_send_partial(sk);
1628 
1629         prot =(struct proto *)sk->prot;
1630         th =(struct tcphdr *)&sk->dummy_th;
1631         release_sock(sk); /* incase the malloc sleeps. */
1632         buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1633         if (buff == NULL)
1634                 return;
1635         sk->inuse = 1;
1636 
1637         buff->sk = sk;
1638         buff->len = sizeof(*t1);
1639         buff->localroute = sk->localroute;
1640         t1 =(struct tcphdr *) buff->data;
1641 
1642         /*
1643          *      Put in the IP header and routing stuff. 
1644          */
1645 
1646         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1647                            IPPROTO_TCP, sk->opt,
1648                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1649         if (tmp < 0) 
1650         {
1651                 /*
1652                  *      Finish anyway, treat this as a send that got lost. 
1653                  *
1654                  *      Enter FIN_WAIT1 on normal shutdown, which waits for
1655                  *      written data to be completely acknowledged along
1656                  *      with an acknowledge to our FIN.
1657                  *
1658                  *      Enter FIN_WAIT2 on abnormal shutdown -- close before
1659                  *      connection established.
1660                  */
1661                 buff->free=1;
1662                 prot->wfree(sk,buff->mem_addr, buff->mem_len);
1663 
1664                 if (sk->state == TCP_ESTABLISHED)
1665                         tcp_set_state(sk,TCP_FIN_WAIT1);
1666                 else if(sk->state == TCP_CLOSE_WAIT)
1667                         tcp_set_state(sk,TCP_LAST_ACK);
1668                 else
1669                         tcp_set_state(sk,TCP_FIN_WAIT2);
1670 
1671                 release_sock(sk);
1672                 return;
1673         }
1674 
1675         t1 =(struct tcphdr *)((char *)t1 +tmp);
1676         buff->len += tmp;
1677         buff->dev = dev;
1678         memcpy(t1, th, sizeof(*t1));
1679         t1->seq = ntohl(sk->write_seq);
1680         sk->write_seq++;
1681         buff->h.seq = sk->write_seq;
1682         t1->ack = 1;
1683         t1->ack_seq = ntohl(sk->acked_seq);
1684         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
1685         t1->fin = 1;
1686         t1->rst = 0;
1687         t1->doff = sizeof(*t1)/4;
1688         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1689 
1690         /*
1691          * If there is data in the write queue, the fin must be appended to
1692          * the write queue.
1693          */
1694         
1695         if (skb_peek(&sk->write_queue) != NULL) 
1696         {
1697                 buff->free=0;
1698                 if (buff->next != NULL) 
1699                 {
1700                         printk("tcp_shutdown: next != NULL\n");
1701                         skb_unlink(buff);
1702                 }
1703                 skb_queue_tail(&sk->write_queue, buff);
1704         } 
1705         else 
1706         {
1707                 sk->sent_seq = sk->write_seq;
1708                 sk->prot->queue_xmit(sk, dev, buff, 0);
1709         }
1710 
1711         if (sk->state == TCP_ESTABLISHED) 
1712                 tcp_set_state(sk,TCP_FIN_WAIT1);
1713         else if (sk->state == TCP_CLOSE_WAIT)
1714                 tcp_set_state(sk,TCP_LAST_ACK);
1715         else
1716                 tcp_set_state(sk,TCP_FIN_WAIT2);
1717 
1718         release_sock(sk);
1719 }
1720 
1721 
1722 static int
1723 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1724              int to_len, int nonblock, unsigned flags,
1725              struct sockaddr_in *addr, int *addr_len)
1726 {
1727         int result;
1728   
1729         /* 
1730          *      Have to check these first unlike the old code. If 
1731          *      we check them after we lose data on an error
1732          *      which is wrong 
1733          */
1734 
1735         if(addr_len)
1736                 *addr_len = sizeof(*addr);
1737         result=tcp_read(sk, to, to_len, nonblock, flags);
1738 
1739         if (result < 0) 
1740                 return(result);
1741   
1742         if(addr)
1743         {
1744                 addr->sin_family = AF_INET;
1745                 addr->sin_port = sk->dummy_th.dest;
1746                 addr->sin_addr.s_addr = sk->daddr;
1747         }
1748         return(result);
1749 }
1750 
1751 
1752 /*
1753  *      This routine will send an RST to the other tcp. 
1754  */
1755  
1756 static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1757           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1758 {
1759         struct sk_buff *buff;
1760         struct tcphdr *t1;
1761         int tmp;
1762         struct device *ndev=NULL;
1763   
1764 /*
1765  * We need to grab some memory, and put together an RST,
1766  * and then put it into the queue to be sent.
1767  */
1768 
1769         buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1770         if (buff == NULL) 
1771                 return;
1772 
1773         buff->len = sizeof(*t1);
1774         buff->sk = NULL;
1775         buff->dev = dev;
1776         buff->localroute = 0;
1777 
1778         t1 =(struct tcphdr *) buff->data;
1779 
1780         /*
1781          *      Put in the IP header and routing stuff. 
1782          */
1783 
1784         tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
1785                            sizeof(struct tcphdr),tos,ttl);
1786         if (tmp < 0) 
1787         {
1788                 buff->free = 1;
1789                 prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1790                 return;
1791         }
1792 
1793         t1 =(struct tcphdr *)((char *)t1 +tmp);
1794         buff->len += tmp;
1795         memcpy(t1, th, sizeof(*t1));
1796 
1797         /*
1798          *      Swap the send and the receive. 
1799          */
1800 
1801         t1->dest = th->source;
1802         t1->source = th->dest;
1803         t1->rst = 1;  
1804         t1->window = 0;
1805   
1806         if(th->ack)
1807         {
1808                 t1->ack = 0;
1809                 t1->seq = th->ack_seq;
1810                 t1->ack_seq = 0;
1811         }
1812         else
1813         {
1814                 t1->ack = 1;
1815                 if(!th->syn)
1816                         t1->ack_seq=htonl(th->seq);
1817                 else
1818                         t1->ack_seq=htonl(th->seq+1);
1819                 t1->seq=0;
1820         }
1821 
1822         t1->syn = 0;
1823         t1->urg = 0;
1824         t1->fin = 0;
1825         t1->psh = 0;
1826         t1->doff = sizeof(*t1)/4;
1827         tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1828         prot->queue_xmit(NULL, dev, buff, 1);
1829         tcp_statistics.TcpOutSegs++;
1830 }
1831 
1832 
1833 /*
1834  *      Look for tcp options. Parses everything but only knows about MSS.
1835  *      This routine is always called with the packet containing the SYN.
1836  *      However it may also be called with the ack to the SYN.  So you
1837  *      can't assume this is always the SYN.  It's always called after
1838  *      we have set up sk->mtu to our own MTU.
1839  */
1840  
1841 static void tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1842 {
1843         unsigned char *ptr;
1844         int length=(th->doff*4)-sizeof(struct tcphdr);
1845         int mss_seen = 0;
1846     
1847         ptr = (unsigned char *)(th + 1);
1848   
1849         while(length>0)
1850         {
1851                 int opcode=*ptr++;
1852                 int opsize=*ptr++;
1853                 switch(opcode)
1854                 {
1855                         case TCPOPT_EOL:
1856                                 return;
1857                         case TCPOPT_NOP:
1858                                 length-=2;
1859                                 continue;
1860                         
1861                         default:
1862                                 if(opsize<=2)   /* Avoid silly options looping forever */
1863                                         return;
1864                                 switch(opcode)
1865                                 {
1866                                         case TCPOPT_MSS:
1867                                                 if(opsize==4 && th->syn)
1868                                                 {
1869                                                         sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1870                                                         mss_seen = 1;
1871                                                 }
1872                                                 break;
1873                                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1874                                 }
1875                                 ptr+=opsize-2;
1876                                 length-=opsize;
1877                 }
1878         }
1879         if (th->syn) 
1880         {
1881                 if (! mss_seen)
1882                       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1883         }
1884 #ifdef CONFIG_INET_PCTCP
1885         sk->mss = min(sk->max_window >> 1, sk->mtu);
1886 #else    
1887         sk->mss = min(sk->max_window, sk->mtu);
1888 #endif  
1889 }
1890 
1891 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1892 {
1893         dst = ntohl(dst);
1894         if (IN_CLASSA(dst))
1895                 return htonl(IN_CLASSA_NET);
1896         if (IN_CLASSB(dst))
1897                 return htonl(IN_CLASSB_NET);
1898         return htonl(IN_CLASSC_NET);
1899 }
1900 
1901 /*
1902  *      This routine handles a connection request.
1903  *      It should make sure we haven't already responded.
1904  *      Because of the way BSD works, we have to send a syn/ack now.
1905  *      This also means it will be harder to close a socket which is
1906  *      listening.
1907  */
1908  
1909 static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1910                  unsigned long daddr, unsigned long saddr,
1911                  struct options *opt, struct device *dev)
1912 {
1913         struct sk_buff *buff;
1914         struct tcphdr *t1;
1915         unsigned char *ptr;
1916         struct sock *newsk;
1917         struct tcphdr *th;
1918         struct device *ndev=NULL;
1919         int tmp;
1920         struct rtable *rt;
1921   
1922         th = skb->h.th;
1923 
1924         /* If the socket is dead, don't accept the connection. */
1925         if (!sk->dead) 
1926         {
1927                 sk->data_ready(sk,0);
1928         }
1929         else 
1930         {
1931                 tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1932                 tcp_statistics.TcpAttemptFails++;
1933                 kfree_skb(skb, FREE_READ);
1934                 return;
1935         }
1936 
1937         /*
1938          * Make sure we can accept more.  This will prevent a
1939          * flurry of syns from eating up all our memory.
1940          */
1941 
1942         if (sk->ack_backlog >= sk->max_ack_backlog) 
1943         {
1944                 tcp_statistics.TcpAttemptFails++;
1945                 kfree_skb(skb, FREE_READ);
1946                 return;
1947         }
1948 
1949         /*
1950          * We need to build a new sock struct.
1951          * It is sort of bad to have a socket without an inode attached
1952          * to it, but the wake_up's will just wake up the listening socket,
1953          * and if the listening socket is destroyed before this is taken
1954          * off of the queue, this will take care of it.
1955          */
1956 
1957         newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
1958         if (newsk == NULL) 
1959         {
1960                 /* just ignore the syn.  It will get retransmitted. */
1961                 tcp_statistics.TcpAttemptFails++;
1962                 kfree_skb(skb, FREE_READ);
1963                 return;
1964         }
1965 
1966         memcpy(newsk, sk, sizeof(*newsk));
1967         skb_queue_head_init(&newsk->write_queue);
1968         skb_queue_head_init(&newsk->receive_queue);
1969         newsk->send_head = NULL;
1970         newsk->send_tail = NULL;
1971         skb_queue_head_init(&newsk->back_log);
1972         newsk->rtt = 0;         /*TCP_CONNECT_TIME<<3*/
1973         newsk->rto = TCP_TIMEOUT_INIT;
1974         newsk->mdev = 0;
1975         newsk->max_window = 0;
1976         newsk->cong_window = 1;
1977         newsk->cong_count = 0;
1978         newsk->ssthresh = 0;
1979         newsk->backoff = 0;
1980         newsk->blog = 0;
1981         newsk->intr = 0;
1982         newsk->proc = 0;
1983         newsk->done = 0;
1984         newsk->partial = NULL;
1985         newsk->pair = NULL;
1986         newsk->wmem_alloc = 0;
1987         newsk->rmem_alloc = 0;
1988         newsk->localroute = sk->localroute;
1989 
1990         newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
1991 
1992         newsk->err = 0;
1993         newsk->shutdown = 0;
1994         newsk->ack_backlog = 0;
1995         newsk->acked_seq = skb->h.th->seq+1;
1996         newsk->fin_seq = skb->h.th->seq;
1997         newsk->copied_seq = skb->h.th->seq;
1998         newsk->state = TCP_SYN_RECV;
1999         newsk->timeout = 0;
2000         newsk->write_seq = jiffies * SEQ_TICK - seq_offset;
2001         newsk->window_seq = newsk->write_seq;
2002         newsk->rcv_ack_seq = newsk->write_seq;
2003         newsk->urg_data = 0;
2004         newsk->retransmits = 0;
2005         newsk->destroy = 0;
2006         init_timer(&newsk->timer);
2007         newsk->timer.data = (unsigned long)newsk;
2008         newsk->timer.function = &net_timer;
2009         newsk->dummy_th.source = skb->h.th->dest;
2010         newsk->dummy_th.dest = skb->h.th->source;
2011         
2012         /*
2013          *      Swap these two, they are from our point of view. 
2014          */
2015          
2016         newsk->daddr = saddr;
2017         newsk->saddr = daddr;
2018 
2019         put_sock(newsk->num,newsk);
2020         newsk->dummy_th.res1 = 0;
2021         newsk->dummy_th.doff = 6;
2022         newsk->dummy_th.fin = 0;
2023         newsk->dummy_th.syn = 0;
2024         newsk->dummy_th.rst = 0;        
2025         newsk->dummy_th.psh = 0;
2026         newsk->dummy_th.ack = 0;
2027         newsk->dummy_th.urg = 0;
2028         newsk->dummy_th.res2 = 0;
2029         newsk->acked_seq = skb->h.th->seq + 1;
2030         newsk->copied_seq = skb->h.th->seq;
2031 
2032         /*
2033          *      Grab the ttl and tos values and use them 
2034          */
2035 
2036         newsk->ip_ttl=sk->ip_ttl;
2037         newsk->ip_tos=skb->ip_hdr->tos;
2038 
2039         /*
2040          *      Use 512 or whatever user asked for 
2041          */
2042 
2043         /*
2044          *      Note use of sk->user_mss, since user has no direct access to newsk 
2045          */
2046 
2047         rt=ip_rt_route(saddr, NULL,NULL);
2048         
2049         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
2050                 newsk->window_clamp = rt->rt_window;
2051         else
2052                 newsk->window_clamp = 0;
2053                 
2054         if (sk->user_mss)
2055                 newsk->mtu = sk->user_mss;
2056         else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
2057                 newsk->mtu = rt->rt_mss - HEADER_SIZE;
2058         else 
2059         {
2060 #ifdef CONFIG_INET_SNARL        /* Sub Nets Are Local */
2061                 if ((saddr ^ daddr) & default_mask(saddr))
2062 #else
2063                 if ((saddr ^ daddr) & dev->pa_mask)
2064 #endif
2065                         newsk->mtu = 576 - HEADER_SIZE;
2066                 else
2067                         newsk->mtu = MAX_WINDOW;
2068         }
2069 
2070         /*
2071          *      But not bigger than device MTU 
2072          */
2073 
2074         newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
2075 
2076         /*
2077          *      This will min with what arrived in the packet 
2078          */
2079 
2080         tcp_options(newsk,skb->h.th);
2081 
2082         buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
2083         if (buff == NULL) 
2084         {
2085                 sk->err = -ENOMEM;
2086                 newsk->dead = 1;
2087                 release_sock(newsk);
2088                 kfree_skb(skb, FREE_READ);
2089                 tcp_statistics.TcpAttemptFails++;
2090                 return;
2091         }
2092   
2093         buff->len = sizeof(struct tcphdr)+4;
2094         buff->sk = newsk;
2095         buff->localroute = newsk->localroute;
2096 
2097         t1 =(struct tcphdr *) buff->data;
2098 
2099         /*
2100          *      Put in the IP header and routing stuff. 
2101          */
2102 
2103         tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
2104                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2105 
2106         /*
2107          *      Something went wrong. 
2108          */
2109 
2110         if (tmp < 0) 
2111         {
2112                 sk->err = tmp;
2113                 buff->free=1;
2114                 kfree_skb(buff,FREE_WRITE);
2115                 newsk->dead = 1;
2116                 release_sock(newsk);
2117                 skb->sk = sk;
2118                 kfree_skb(skb, FREE_READ);
2119                 tcp_statistics.TcpAttemptFails++;
2120                 return;
2121         }
2122 
2123         buff->len += tmp;
2124         t1 =(struct tcphdr *)((char *)t1 +tmp);
2125   
2126         memcpy(t1, skb->h.th, sizeof(*t1));
2127         buff->h.seq = newsk->write_seq;
2128         /*
2129          *      Swap the send and the receive. 
2130          */
2131         t1->dest = skb->h.th->source;
2132         t1->source = newsk->dummy_th.source;
2133         t1->seq = ntohl(newsk->write_seq++);
2134         t1->ack = 1;
2135         newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/
2136         newsk->sent_seq = newsk->write_seq;
2137         t1->window = ntohs(newsk->window);
2138         t1->res1 = 0;
2139         t1->res2 = 0;
2140         t1->rst = 0;
2141         t1->urg = 0;
2142         t1->psh = 0;
2143         t1->syn = 1;
2144         t1->ack_seq = ntohl(skb->h.th->seq+1);
2145         t1->doff = sizeof(*t1)/4+1;
2146         ptr =(unsigned char *)(t1+1);
2147         ptr[0] = 2;
2148         ptr[1] = 4;
2149         ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2150         ptr[3] =(newsk->mtu) & 0xff;
2151 
2152         tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2153         newsk->prot->queue_xmit(newsk, dev, buff, 0);
2154 
2155         reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_TIMEOUT_INIT);
2156         skb->sk = newsk;
2157 
2158         /*
2159          *      Charge the sock_buff to newsk. 
2160          */
2161          
2162         sk->rmem_alloc -= skb->mem_len;
2163         newsk->rmem_alloc += skb->mem_len;
2164         
2165         skb_queue_tail(&sk->receive_queue,skb);
2166         sk->ack_backlog++;
2167         release_sock(newsk);
2168         tcp_statistics.TcpOutSegs++;
2169 }
2170 
2171 
2172 static void tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2173 {
2174         struct sk_buff *buff;
2175         int need_reset = 0;
2176         struct tcphdr *t1, *th;
2177         struct proto *prot;
2178         struct device *dev=NULL;
2179         int tmp;
2180 
2181         /*
2182          * We need to grab some memory, and put together a FIN, 
2183          * and then put it into the queue to be sent.
2184          */
2185         sk->inuse = 1;
2186         sk->keepopen = 1;
2187         sk->shutdown = SHUTDOWN_MASK;
2188 
2189         if (!sk->dead) 
2190                 sk->state_change(sk);
2191 
2192         if (timeout == 0) 
2193         {
2194                 /*
2195                  *  We need to flush the recv. buffs.  We do this only on the
2196                  *  descriptor close, not protocol-sourced closes, because the
2197                  *  reader process may not have drained the data yet!
2198                  */
2199 
2200                 if (skb_peek(&sk->receive_queue) != NULL) 
2201                 {
2202                         struct sk_buff *skb;
2203                         if(sk->debug)
2204                                 printk("Clean rcv queue\n");
2205                         while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
2206                         {
2207                                 /* The +1 is not needed because the FIN takes up seq
2208                                    is not read!!! */
2209                                 if(skb->len > 0 && after(skb->h.th->seq + skb->len , sk->copied_seq))
2210                                         need_reset = 1;
2211                                 kfree_skb(skb, FREE_READ);
2212                         }
2213                         if(sk->debug)
2214                                 printk("Cleaned.\n");
2215                 }
2216         }
2217 
2218         /*
2219          *      Get rid off any half-completed packets. 
2220          */
2221          
2222         if (sk->partial) 
2223         {
2224                 tcp_send_partial(sk);
2225         }
2226 
2227         switch(sk->state) 
2228         {
2229                 case TCP_FIN_WAIT1:
2230                 case TCP_FIN_WAIT2:
2231                 case TCP_CLOSING:
2232                         /*
2233                          * These states occur when we have already closed out
2234                          * our end.  If there is no timeout, we do not do
2235                          * anything.  We may still be in the middle of sending
2236                          * the remainder of our buffer, for example...
2237                          * resetting the timer would be inappropriate.
2238                          *
2239                          * XXX if retransmit count reaches limit, is tcp_close()
2240                          * called with timeout == 1 ? if not, we need to fix that.
2241                          */
2242                         if (!timeout) {
2243                                 int timer_active;
2244 
2245                                 timer_active = del_timer(&sk->timer);
2246                                 if (timer_active)
2247                                         add_timer(&sk->timer);
2248                                 else
2249                                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2250                         }
2251 #ifdef NOTDEF
2252                         /* 
2253                          *      Start a timer.
2254                          * original code was 4 * sk->rtt.  In converting to the
2255                          * new rtt representation, we can't quite use that.
2256                          * it seems to make most sense to  use the backed off value
2257                          */
2258                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2259 #endif
2260                         if (timeout) 
2261                                 tcp_time_wait(sk);
2262                         release_sock(sk);
2263                         return; /* break causes a double release - messy */
2264                 case TCP_TIME_WAIT:
2265                 case TCP_LAST_ACK:
2266                         /*
2267                          * A timeout from these states terminates the TCB.
2268                          */
2269                         if (timeout) 
2270                         {
2271                                 tcp_set_state(sk,TCP_CLOSE);
2272                         }
2273                         release_sock(sk);
2274                         return;
2275                 case TCP_LISTEN:
2276                         tcp_set_state(sk,TCP_CLOSE);
2277                         release_sock(sk);
2278                         return;
2279                 case TCP_CLOSE:
2280                         release_sock(sk);
2281                         return;
2282                 case TCP_CLOSE_WAIT:
2283                 case TCP_ESTABLISHED:
2284                 case TCP_SYN_SENT:
2285                 case TCP_SYN_RECV:
2286                         prot =(struct proto *)sk->prot;
2287                         th =(struct tcphdr *)&sk->dummy_th;
2288                         buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2289                         if (buff == NULL) 
2290                         {
2291                                 /* This will force it to try again later. */
2292                                 /* Or it would have if someone released the socket
2293                                    first. Anyway it might work now */
2294                                 release_sock(sk);
2295                                 if (sk->state != TCP_CLOSE_WAIT)
2296                                         tcp_set_state(sk,TCP_ESTABLISHED);
2297                                 reset_timer(sk, TIME_CLOSE, 100);
2298                                 return;
2299                         }
2300                         buff->sk = sk;
2301                         buff->free = 1;
2302                         buff->len = sizeof(*t1);
2303                         buff->localroute = sk->localroute;
2304                         t1 =(struct tcphdr *) buff->data;
2305         
2306                         /*
2307                          *      Put in the IP header and routing stuff. 
2308                          */
2309                         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2310                                          IPPROTO_TCP, sk->opt,
2311                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2312                         if (tmp < 0) 
2313                         {
2314                                 sk->write_seq++;        /* Very important 8) */
2315                                 kfree_skb(buff,FREE_WRITE);
2316 
2317                                 /*
2318                                  * Enter FIN_WAIT1 to await completion of
2319                                  * written out data and ACK to our FIN.
2320                                  */
2321 
2322                                 if(sk->state==TCP_ESTABLISHED)
2323                                         tcp_set_state(sk,TCP_FIN_WAIT1);
2324                                 else
2325                                         tcp_set_state(sk,TCP_FIN_WAIT2);
2326                                 reset_timer(sk, TIME_CLOSE,4*sk->rto);
2327                                 if(timeout)
2328                                         tcp_time_wait(sk);
2329 
2330                                 release_sock(sk);
2331                                 return;
2332                         }
2333 
2334                         t1 =(struct tcphdr *)((char *)t1 +tmp);
2335                         buff->len += tmp;
2336                         buff->dev = dev;
2337                         memcpy(t1, th, sizeof(*t1));
2338                         t1->seq = ntohl(sk->write_seq);
2339                         sk->write_seq++;
2340                         buff->h.seq = sk->write_seq;
2341                         t1->ack = 1;
2342         
2343                         /* 
2344                          *      Ack everything immediately from now on. 
2345                          */
2346 
2347                         sk->delay_acks = 0;
2348                         t1->ack_seq = ntohl(sk->acked_seq);
2349                         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
2350                         t1->fin = 1;
2351                         t1->rst = need_reset;
2352                         t1->doff = sizeof(*t1)/4;
2353                         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2354 
2355                         tcp_statistics.TcpOutSegs++;
2356         
2357                         if (skb_peek(&sk->write_queue) == NULL) 
2358                         {
2359                                 sk->sent_seq = sk->write_seq;
2360                                 prot->queue_xmit(sk, dev, buff, 0);
2361                         } 
2362                         else 
2363                         {
2364                                 reset_timer(sk, TIME_WRITE, sk->rto);
2365                                 if (buff->next != NULL) 
2366                                 {
2367                                         printk("tcp_close: next != NULL\n");
2368                                         skb_unlink(buff);
2369                                 }
2370                                 skb_queue_tail(&sk->write_queue, buff);
2371                         }
2372 
2373                         /*
2374                          * If established (normal close), enter FIN_WAIT1.
2375                          * If in CLOSE_WAIT, enter LAST_ACK
2376                          * If in CLOSING, remain in CLOSING
2377                          * otherwise enter FIN_WAIT2
2378                          */
2379 
2380                         if (sk->state == TCP_ESTABLISHED)
2381                                 tcp_set_state(sk,TCP_FIN_WAIT1);
2382                         else if (sk->state == TCP_CLOSE_WAIT)
2383                                 tcp_set_state(sk,TCP_LAST_ACK);
2384                         else if (sk->state != TCP_CLOSING)
2385                                 tcp_set_state(sk,TCP_FIN_WAIT2);
2386         }
2387         release_sock(sk);
2388 }
2389 
2390 
2391 /*
2392  * This routine takes stuff off of the write queue,
2393  * and puts it in the xmit queue.
2394  */
2395 static void
2396 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2397 {
2398         struct sk_buff *skb;
2399 
2400         /*
2401          *      The bytes will have to remain here. In time closedown will
2402          *      empty the write queue and all will be happy 
2403          */
2404 
2405         if(sk->zapped)
2406                 return;
2407 
2408         while((skb = skb_peek(&sk->write_queue)) != NULL &&
2409                 before(skb->h.seq, sk->window_seq + 1) &&
2410                 (sk->retransmits == 0 ||
2411                  sk->timeout != TIME_WRITE ||
2412                  before(skb->h.seq, sk->rcv_ack_seq + 1))
2413                 && sk->packets_out < sk->cong_window) 
2414         {
2415                 IS_SKB(skb);
2416                 skb_unlink(skb);
2417                 /* See if we really need to send the packet. */
2418                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) 
2419                 {
2420                         sk->retransmits = 0;
2421                         kfree_skb(skb, FREE_WRITE);
2422                         if (!sk->dead) 
2423                                 sk->write_space(sk);
2424                 } 
2425                 else
2426                 {
2427                         struct tcphdr *th;
2428                         struct iphdr *iph;
2429                         int size;
2430 /*
2431  * put in the ack seq and window at this point rather than earlier,
2432  * in order to keep them monotonic.  We really want to avoid taking
2433  * back window allocations.  That's legal, but RFC1122 says it's frowned on.
2434  * Ack and window will in general have changed since this packet was put
2435  * on the write queue.
2436  */
2437                         iph = (struct iphdr *)(skb->data +
2438                                                skb->dev->hard_header_len);
2439                         th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
2440                         size = skb->len - (((unsigned char *) th) - skb->data);
2441                         
2442                         th->ack_seq = ntohl(sk->acked_seq);
2443                         th->window = ntohs(tcp_select_window(sk));
2444 
2445                         tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
2446 
2447                         sk->sent_seq = skb->h.seq;
2448                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2449                 }
2450         }
2451 }
2452 
2453 
2454 /*
2455  *      This routine sorts the send list, and resets the
2456  *      sk->send_head and sk->send_tail pointers.
2457  */
2458 
2459 static void sort_send(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2460 {
2461         struct sk_buff *list = NULL;
2462         struct sk_buff *skb,*skb2,*skb3;
2463 
2464         for (skb = sk->send_head; skb != NULL; skb = skb2) 
2465         {
2466                 skb2 = skb->link3;
2467                 if (list == NULL || before (skb2->h.seq, list->h.seq)) 
2468                 {
2469                         skb->link3 = list;
2470                         sk->send_tail = skb;
2471                         list = skb;
2472                 }
2473                 else
2474                 {
2475                         for (skb3 = list; ; skb3 = skb3->link3) 
2476                         {
2477                                 if (skb3->link3 == NULL ||
2478                                     before(skb->h.seq, skb3->link3->h.seq))
2479                                 {
2480                                         skb->link3 = skb3->link3;
2481                                         skb3->link3 = skb;
2482                                         if (skb->link3 == NULL) 
2483                                                 sk->send_tail = skb;
2484                                         break;
2485                                 }
2486                         }
2487                 }
2488         }
2489         sk->send_head = list;
2490 }
2491   
2492 
2493 /*
2494  *      This routine deals with incoming acks, but not outgoing ones.
2495  */
2496 
2497 static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2498 {
2499         unsigned long ack;
2500         int flag = 0;
2501 
2502         /* 
2503          * 1 - there was data in packet as well as ack or new data is sent or 
2504          *     in shutdown state
2505          * 2 - data from retransmit queue was acked and removed
2506          * 4 - window shrunk or data from retransmit queue was acked and removed
2507          */
2508 
2509         if(sk->zapped)
2510                 return(1);      /* Dead, cant ack any more so why bother */
2511 
2512         ack = ntohl(th->ack_seq);
2513         if (ntohs(th->window) > sk->max_window) 
2514         {
2515                 sk->max_window = ntohs(th->window);
2516 #ifdef CONFIG_INET_PCTCP
2517                 sk->mss = min(sk->max_window>>1, sk->mtu);
2518 #else
2519                 sk->mss = min(sk->max_window, sk->mtu);
2520 #endif  
2521         }
2522 
2523         if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2524                 sk->retransmits = 0;
2525 
2526 #if 0
2527 /*
2528  *      Not quite clear why the +1 and -1 here, and why not +1 in next line 
2529  */
2530  
2531         if (after(ack, sk->sent_seq+1) || before(ack, sk->rcv_ack_seq-1)) 
2532 #else   
2533         if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
2534 #endif  
2535         {
2536                 if(sk->debug)
2537                         printk("Ack ignored %lu %lu\n",ack,sk->sent_seq);
2538                         
2539                 /*
2540                  *      Keepalive processing.
2541                  */
2542                  
2543                 if (after(ack, sk->sent_seq) || (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) 
2544                 {
2545                         return(0);
2546                 }
2547                 if (sk->keepopen) 
2548                 {
2549                         if(sk->timeout==TIME_KEEPOPEN)
2550                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2551                 }
2552                 return(1);
2553         }
2554 
2555         if (len != th->doff*4) 
2556                 flag |= 1;
2557 
2558         /* See if our window has been shrunk. */
2559 
2560         if (after(sk->window_seq, ack+ntohs(th->window))) 
2561         {
2562                 /*
2563                  * We may need to move packets from the send queue
2564                  * to the write queue, if the window has been shrunk on us.
2565                  * The RFC says you are not allowed to shrink your window
2566                  * like this, but if the other end does, you must be able
2567                  * to deal with it.
2568                  */
2569                 struct sk_buff *skb;
2570                 struct sk_buff *skb2;
2571                 struct sk_buff *wskb = NULL;
2572         
2573                 skb2 = sk->send_head;
2574                 sk->send_head = NULL;
2575                 sk->send_tail = NULL;
2576         
2577                 flag |= 4;
2578         
2579                 sk->window_seq = ack + ntohs(th->window);
2580                 cli();
2581                 while (skb2 != NULL) 
2582                 {
2583                         skb = skb2;
2584                         skb2 = skb->link3;
2585                         skb->link3 = NULL;
2586                         if (after(skb->h.seq, sk->window_seq)) 
2587                         {
2588                                 if (sk->packets_out > 0) 
2589                                         sk->packets_out--;
2590                                 /* We may need to remove this from the dev send list. */
2591                                 if (skb->next != NULL) 
2592                                 {
2593                                         skb_unlink(skb);                                
2594                                 }
2595                                 /* Now add it to the write_queue. */
2596                                 if (wskb == NULL)
2597                                         skb_queue_head(&sk->write_queue,skb);
2598                                 else
2599                                         skb_append(wskb,skb);
2600                                 wskb = skb;
2601                         } 
2602                         else 
2603                         {
2604                                 if (sk->send_head == NULL) 
2605                                 {
2606                                         sk->send_head = skb;
2607                                         sk->send_tail = skb;
2608                                 }
2609                                 else
2610                                 {
2611                                         sk->send_tail->link3 = skb;
2612                                         sk->send_tail = skb;
2613                                 }
2614                                 skb->link3 = NULL;
2615                         }
2616                 }
2617                 sti();
2618         }
2619 
2620         if (sk->send_tail == NULL || sk->send_head == NULL) 
2621         {
2622                 sk->send_head = NULL;
2623                 sk->send_tail = NULL;
2624                 sk->packets_out= 0;
2625         }
2626 
2627         sk->window_seq = ack + ntohs(th->window);
2628 
2629         /* We don't want too many packets out there. */
2630         if (sk->timeout == TIME_WRITE && 
2631                 sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
2632         {
2633 /* 
2634  * This is Jacobson's slow start and congestion avoidance. 
2635  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2636  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2637  * counter and increment it once every cwnd times.  It's possible
2638  * that this should be done only if sk->retransmits == 0.  I'm
2639  * interpreting "new data is acked" as including data that has
2640  * been retransmitted but is just now being acked.
2641  */
2642                 if (sk->cong_window < sk->ssthresh)  
2643                   /* 
2644                    *    In "safe" area, increase
2645                    */
2646                         sk->cong_window++;
2647                 else 
2648                 {
2649                   /*
2650                    *    In dangerous area, increase slowly.  In theory this is
2651                    *    sk->cong_window += 1 / sk->cong_window
2652                    */
2653                         if (sk->cong_count >= sk->cong_window) 
2654                         {
2655                                 sk->cong_window++;
2656                                 sk->cong_count = 0;
2657                         }
2658                         else 
2659                                 sk->cong_count++;
2660                 }
2661         }
2662 
2663         sk->rcv_ack_seq = ack;
2664 
2665         /*
2666          * if this ack opens up a zero window, clear backoff.  It was
2667          * being used to time the probes, and is probably far higher than
2668          * it needs to be for normal retransmission.
2669          */
2670 
2671         if (sk->timeout == TIME_PROBE0) 
2672         {
2673                 if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
2674                     ! before (sk->window_seq, sk->write_queue.next->h.seq)) 
2675                 {
2676                         sk->retransmits = 0;
2677                         sk->backoff = 0;
2678                   /*
2679                    *    Recompute rto from rtt.  this eliminates any backoff.
2680                    */
2681 
2682                         sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2683                         if (sk->rto > 120*HZ)
2684                                 sk->rto = 120*HZ;
2685                         if (sk->rto < 20)       /* Was 1*HZ, then 1 - turns out we must allow about
2686                                                    .2 of a second because of BSD delayed acks - on a 100Mb/sec link
2687                                                    .2 of a second is going to need huge windows (SIGH) */
2688                                 sk->rto = 20;
2689                 }
2690         }
2691 
2692   /* 
2693    *    See if we can take anything off of the retransmit queue.
2694    */
2695    
2696         while(sk->send_head != NULL) 
2697         {
2698                 /* Check for a bug. */
2699                 if (sk->send_head->link3 &&
2700                     after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) 
2701                 {
2702                         printk("INET: tcp.c: *** bug send_list out of order.\n");
2703                         sort_send(sk);
2704                 }
2705 
2706                 if (before(sk->send_head->h.seq, ack+1)) 
2707                 {
2708                         struct sk_buff *oskb;   
2709                         if (sk->retransmits) 
2710                         {       
2711                                 /*
2712                                  *      We were retransmitting.  don't count this in RTT est 
2713                                  */
2714                                 flag |= 2;
2715 
2716                                 /*
2717                                  * even though we've gotten an ack, we're still
2718                                  * retransmitting as long as we're sending from
2719                                  * the retransmit queue.  Keeping retransmits non-zero
2720                                  * prevents us from getting new data interspersed with
2721                                  * retransmissions.
2722                                  */
2723 
2724                                 if (sk->send_head->link3)
2725                                         sk->retransmits = 1;
2726                                 else
2727                                         sk->retransmits = 0;
2728                         }
2729                         /*
2730                          * Note that we only reset backoff and rto in the
2731                          * rtt recomputation code.  And that doesn't happen
2732                          * if there were retransmissions in effect.  So the
2733                          * first new packet after the retransmissions is
2734                          * sent with the backoff still in effect.  Not until
2735                          * we get an ack from a non-retransmitted packet do
2736                          * we reset the backoff and rto.  This allows us to deal
2737                          * with a situation where the network delay has increased
2738                          * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2739                          */
2740 
2741                         /*
2742                          *      We have one less packet out there. 
2743                          */
2744                          
2745                         if (sk->packets_out > 0) 
2746                                 sk->packets_out --;
2747                         /* 
2748                          *      Wake up the process, it can probably write more. 
2749                          */
2750                         if (!sk->dead) 
2751                                 sk->write_space(sk);
2752                         oskb = sk->send_head;
2753 
2754                         if (!(flag&2)) 
2755                         {
2756                                 long m;
2757         
2758                                 /*
2759                                  *      The following amusing code comes from Jacobson's
2760                                  *      article in SIGCOMM '88.  Note that rtt and mdev
2761                                  *      are scaled versions of rtt and mean deviation.
2762                                  *      This is designed to be as fast as possible 
2763                                  *      m stands for "measurement".
2764                                  */
2765         
2766                                 m = jiffies - oskb->when;  /* RTT */
2767                                 if(m<=0)
2768                                         m=1;            /* IS THIS RIGHT FOR <0 ??? */
2769                                 m -= (sk->rtt >> 3);    /* m is now error in rtt est */
2770                                 sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
2771                                 if (m < 0)
2772                                         m = -m;         /* m is now abs(error) */
2773                                 m -= (sk->mdev >> 2);   /* similar update on mdev */
2774                                 sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
2775         
2776                                 /*
2777                                  *      Now update timeout.  Note that this removes any backoff.
2778                                  */
2779                          
2780                                 sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2781                                 if (sk->rto > 120*HZ)
2782                                         sk->rto = 120*HZ;
2783                                 if (sk->rto < 20)       /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
2784                                         sk->rto = 20;
2785                                 sk->backoff = 0;
2786                         }
2787                         flag |= (2|4);
2788                         cli();
2789                         oskb = sk->send_head;
2790                         IS_SKB(oskb);
2791                         sk->send_head = oskb->link3;
2792                         if (sk->send_head == NULL) 
2793                         {
2794                                 sk->send_tail = NULL;
2795                         }
2796 
2797                 /*
2798                  *      We may need to remove this from the dev send list. 
2799                  */
2800 
2801                         if (oskb->next)
2802                                 skb_unlink(oskb);
2803                         sti();
2804                         kfree_skb(oskb, FREE_WRITE); /* write. */
2805                         if (!sk->dead) 
2806                                 sk->write_space(sk);
2807                 }
2808                 else
2809                 {
2810                         break;
2811                 }
2812         }
2813 
2814         /*
2815          * XXX someone ought to look at this too.. at the moment, if skb_peek()
2816          * returns non-NULL, we complete ignore the timer stuff in the else
2817          * clause.  We ought to organize the code so that else clause can
2818          * (should) be executed regardless, possibly moving the PROBE timer
2819          * reset over.  The skb_peek() thing should only move stuff to the
2820          * write queue, NOT also manage the timer functions.
2821          */
2822 
2823         /*
2824          * Maybe we can take some stuff off of the write queue,
2825          * and put it onto the xmit queue.
2826          */
2827         if (skb_peek(&sk->write_queue) != NULL) 
2828         {
2829                 if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
2830                         (sk->retransmits == 0 || 
2831                          sk->timeout != TIME_WRITE ||
2832                          before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
2833                         && sk->packets_out < sk->cong_window) 
2834                 {
2835                         flag |= 1;
2836                         tcp_write_xmit(sk);
2837                 }
2838                 else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
2839                         sk->send_head == NULL &&
2840                         sk->ack_backlog == 0 &&
2841                         sk->state != TCP_TIME_WAIT) 
2842                 {
2843                         reset_timer(sk, TIME_PROBE0, sk->rto);
2844                 }               
2845         }
2846         else
2847         {
2848                 /*
2849                  * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
2850                  * from TCP_CLOSE we don't do anything
2851                  *
2852                  * from anything else, if there is write data (or fin) pending,
2853                  * we use a TIME_WRITE timeout, else if keepalive we reset to
2854                  * a KEEPALIVE timeout, else we delete the timer.
2855                  *
2856                  * We do not set flag for nominal write data, otherwise we may
2857                  * force a state where we start to write itsy bitsy tidbits
2858                  * of data.
2859                  */
2860 
2861                 switch(sk->state) {
2862                 case TCP_TIME_WAIT:
2863                         /*
2864                          * keep us in TIME_WAIT until we stop getting packets,
2865                          * reset the timeout.
2866                          */
2867                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2868                         break;
2869                 case TCP_CLOSE:
2870                         /*
2871                          * don't touch the timer.
2872                          */
2873                         break;
2874                 default:
2875                         /*
2876                          * must check send_head, write_queue, and ack_backlog
2877                          * to determine which timeout to use.
2878                          */
2879                         if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) {
2880                                 reset_timer(sk, TIME_WRITE, sk->rto);
2881                         } else if (sk->keepopen) {
2882                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2883                         } else {
2884                                 delete_timer(sk);
2885                         }
2886                         break;
2887                 }
2888 #ifdef NOTDEF
2889                 if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2890                 sk->state != TCP_TIME_WAIT && !sk->keepopen) 
2891                 {
2892                         if (!sk->dead)
2893                                 sk->write_space(sk);
2894                         if (sk->keepopen) {
2895                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2896                         } else {
2897                                 delete_timer(sk);
2898                         }
2899                 }
2900                 else
2901                 {
2902                         if (sk->state != (unsigned char) sk->keepopen) 
2903                         {
2904                                 reset_timer(sk, TIME_WRITE, sk->rto);
2905                         }
2906                         if (sk->state == TCP_TIME_WAIT) 
2907                         {
2908                                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2909                         }       
2910                 }
2911 #endif
2912         }
2913 
2914         if (sk->packets_out == 0 && sk->partial != NULL &&
2915                 skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
2916         {
2917                 flag |= 1;
2918                 tcp_send_partial(sk);
2919         }
2920 
2921         /*
2922          * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
2923          * we are now waiting for an acknowledge to our FIN.  The other end is
2924          * already in TIME_WAIT.
2925          *
2926          * Move to TCP_CLOSE on success.
2927          */
2928 
2929         if (sk->state == TCP_LAST_ACK) 
2930         {
2931                 if (!sk->dead)
2932                         sk->state_change(sk);
2933                 if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) 
2934                 {
2935                         flag |= 1;
2936                         tcp_set_state(sk,TCP_CLOSE);
2937                         sk->shutdown = SHUTDOWN_MASK;
2938                 }
2939         }
2940 
2941         /*
2942          * Incoming ACK to a FIN we sent in the case of our initiating the close.
2943          *
2944          * Move to FIN_WAIT2 to await a FIN from the other end.
2945          */
2946 
2947         if (sk->state == TCP_FIN_WAIT1) 
2948         {
2949 
2950                 if (!sk->dead) 
2951                         sk->state_change(sk);
2952                 if (sk->rcv_ack_seq == sk->write_seq) 
2953                 {
2954                         flag |= 1;
2955                         if (sk->acked_seq != sk->fin_seq) 
2956                         {
2957                                 tcp_time_wait(sk);
2958                         }
2959                         else
2960                         {
2961                                 sk->shutdown = SHUTDOWN_MASK;
2962                                 tcp_set_state(sk,TCP_FIN_WAIT2);
2963                         }
2964                 }
2965         }
2966 
2967         /*
2968          *      Incoming ACK to a FIN we sent in the case of a simultaneous close.
2969          *
2970          *      Move to TIME_WAIT
2971          */
2972 
2973         if (sk->state == TCP_CLOSING) 
2974         {
2975 
2976                 if (!sk->dead) 
2977                         sk->state_change(sk);
2978                 if (sk->rcv_ack_seq == sk->write_seq) 
2979                 {
2980                         flag |= 1;
2981                         tcp_time_wait(sk);
2982                 }
2983         }
2984 
2985         /*
2986          * I make no guarantees about the first clause in the following
2987          * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
2988          * what conditions "!flag" would be true.  However I think the rest
2989          * of the conditions would prevent that from causing any
2990          * unnecessary retransmission. 
2991          *   Clearly if the first packet has expired it should be 
2992          * retransmitted.  The other alternative, "flag&2 && retransmits", is
2993          * harder to explain:  You have to look carefully at how and when the
2994          * timer is set and with what timeout.  The most recent transmission always
2995          * sets the timer.  So in general if the most recent thing has timed
2996          * out, everything before it has as well.  So we want to go ahead and
2997          * retransmit some more.  If we didn't explicitly test for this
2998          * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
2999          * would not be true.  If you look at the pattern of timing, you can
3000          * show that rto is increased fast enough that the next packet would
3001          * almost never be retransmitted immediately.  Then you'd end up
3002          * waiting for a timeout to send each packet on the retransmission
3003          * queue.  With my implementation of the Karn sampling algorithm,
3004          * the timeout would double each time.  The net result is that it would
3005          * take a hideous amount of time to recover from a single dropped packet.
3006          * It's possible that there should also be a test for TIME_WRITE, but
3007          * I think as long as "send_head != NULL" and "retransmit" is on, we've
3008          * got to be in real retransmission mode.
3009          *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
3010          * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
3011          * As long as no further losses occur, this seems reasonable.
3012          */
3013         
3014         if (((!flag) || (flag&4)) && sk->send_head != NULL &&
3015                (((flag&2) && sk->retransmits) ||
3016                (sk->send_head->when + sk->rto < jiffies))) 
3017         {
3018                 ip_do_retransmit(sk, 1);
3019                 reset_timer(sk, TIME_WRITE, sk->rto);
3020         }
3021 
3022         return(1);
3023 }
3024 
3025 
3026 /*
3027  *      This routine handles the data.  If there is room in the buffer,
3028  *      it will be have already been moved into it.  If there is no
3029  *      room, then we will just have to discard the packet.
3030  */
3031 
3032 static int tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
3033          unsigned long saddr, unsigned short len)
3034 {
3035         struct sk_buff *skb1, *skb2;
3036         struct tcphdr *th;
3037         int dup_dumped=0;
3038         unsigned long new_seq;
3039 
3040         th = skb->h.th;
3041         skb->len = len -(th->doff*4);
3042 
3043         /* The bytes in the receive read/assembly queue has increased. Needed for the
3044            low memory discard algorithm */
3045            
3046         sk->bytes_rcv += skb->len;
3047         
3048         if (skb->len == 0 && !th->fin && !th->urg && !th->psh) 
3049         {
3050                 /* 
3051                  *      Don't want to keep passing ack's back and forth. 
3052                  *      (someone sent us dataless, boring frame)
3053                  */
3054                 if (!th->ack)
3055                         tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
3056                 kfree_skb(skb, FREE_READ);
3057                 return(0);
3058         }
3059         
3060         /*
3061          *      We no longer have anyone receiving data on this connection.
3062          */
3063 
3064         if(sk->shutdown & RCV_SHUTDOWN)
3065         {
3066                 new_seq= th->seq + skb->len + th->syn;  /* Right edge of _data_ part of frame */
3067                 
3068                 if(after(new_seq,sk->copied_seq+1))     /* If the right edge of this frame is after the last copied byte
3069                                                            then it contains data we will never touch. We send an RST to 
3070                                                            ensure the far end knows it never got to the application */
3071                 {
3072                         sk->acked_seq = new_seq + th->fin;
3073                         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
3074                                 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
3075                         tcp_statistics.TcpEstabResets++;
3076                         tcp_set_state(sk,TCP_CLOSE);
3077                         sk->err = EPIPE;
3078                         sk->shutdown = SHUTDOWN_MASK;
3079                         kfree_skb(skb, FREE_READ);
3080                         if (!sk->dead)
3081                                 sk->state_change(sk);
3082                         return(0);
3083                 }
3084 #if 0           
3085                 /* Discard the frame here - we've already proved its a duplicate */
3086                 
3087                 kfree_skb(skb, FREE_READ);
3088                 return(0);                              
3089 #endif          
3090         }
3091         /*
3092          *      Now we have to walk the chain, and figure out where this one
3093          *      goes into it.  This is set up so that the last packet we received
3094          *      will be the first one we look at, that way if everything comes
3095          *      in order, there will be no performance loss, and if they come
3096          *      out of order we will be able to fit things in nicely.
3097          */
3098 
3099         /* 
3100          *      This should start at the last one, and then go around forwards.
3101          */
3102 
3103         if (skb_peek(&sk->receive_queue) == NULL)       /* Empty queue is easy case */
3104         {
3105                 skb_queue_head(&sk->receive_queue,skb);
3106                 skb1= NULL;
3107         } 
3108         else
3109         {
3110                 for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
3111                 {
3112                         if(sk->debug)
3113                         {
3114                                 printk("skb1=%p :", skb1);
3115                                 printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
3116                                 printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
3117                                 printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
3118                                                 sk->acked_seq);
3119                         }
3120                         
3121                         /*
3122                          *      Optimisation: Duplicate frame or extension of previous frame from
3123                          *      same sequence point (lost ack case).
3124                          *      The frame contains duplicate data or replaces a previous frame
3125                          *      discard the previous frame (safe as sk->inuse is set) and put
3126                          *      the new one in its place.
3127                          */
3128                          
3129                         if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
3130                         {
3131                                 skb_append(skb1,skb);
3132                                 skb_unlink(skb1);
3133                                 kfree_skb(skb1,FREE_READ);
3134                                 dup_dumped=1;
3135                                 skb1=NULL;
3136                                 break;
3137                         }
3138                         
3139                         /*
3140                          *      Found where it fits
3141                          */
3142                          
3143                         if (after(th->seq+1, skb1->h.th->seq))
3144                         {
3145                                 skb_append(skb1,skb);
3146                                 break;
3147                         }
3148                         
3149                         /*
3150                          *      See if we've hit the start. If so insert.
3151                          */
3152                         if (skb1 == skb_peek(&sk->receive_queue))
3153                         {
3154                                 skb_queue_head(&sk->receive_queue, skb);
3155                                 break;
3156                         }
3157                 }
3158         }
3159 
3160         /*
3161          *      Figure out what the ack value for this frame is
3162          */
3163          
3164         th->ack_seq = th->seq + skb->len;
3165         if (th->syn) 
3166                 th->ack_seq++;
3167         if (th->fin)
3168                 th->ack_seq++;
3169 
3170         if (before(sk->acked_seq, sk->copied_seq)) 
3171         {
3172                 printk("*** tcp.c:tcp_data bug acked < copied\n");
3173                 sk->acked_seq = sk->copied_seq;
3174         }
3175 
3176         /*
3177          *      Now figure out if we can ack anything.
3178          */
3179 
3180         if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) 
3181         {
3182                 if (before(th->seq, sk->acked_seq+1)) 
3183                 {
3184                         int newwindow;
3185 
3186                         if (after(th->ack_seq, sk->acked_seq)) 
3187                         {
3188                                 newwindow = sk->window-(th->ack_seq - sk->acked_seq);
3189                                 if (newwindow < 0)
3190                                         newwindow = 0;  
3191                                 sk->window = newwindow;
3192                                 sk->acked_seq = th->ack_seq;
3193                         }
3194                         skb->acked = 1;
3195 
3196                         /* 
3197                          *      When we ack the fin, we turn on the RCV_SHUTDOWN flag.
3198                          */
3199 
3200                         if (skb->h.th->fin) 
3201                         {
3202                                 if (!sk->dead) 
3203                                         sk->state_change(sk);
3204                                 sk->shutdown |= RCV_SHUTDOWN;
3205                         }
3206           
3207                         for(skb2 = skb->next;
3208                             skb2 != (struct sk_buff *)&sk->receive_queue;
3209                             skb2 = skb2->next) 
3210                         {
3211                                 if (before(skb2->h.th->seq, sk->acked_seq+1)) 
3212                                 {
3213                                         if (after(skb2->h.th->ack_seq, sk->acked_seq))
3214                                         {
3215                                                 newwindow = sk->window -
3216                                                  (skb2->h.th->ack_seq - sk->acked_seq);
3217                                                 if (newwindow < 0)
3218                                                         newwindow = 0;  
3219                                                 sk->window = newwindow;
3220                                                 sk->acked_seq = skb2->h.th->ack_seq;
3221                                         }
3222                                         skb2->acked = 1;
3223                                         /*
3224                                          *      When we ack the fin, we turn on
3225                                          *      the RCV_SHUTDOWN flag.
3226                                          */
3227                                         if (skb2->h.th->fin) 
3228                                         {
3229                                                 sk->shutdown |= RCV_SHUTDOWN;
3230                                                 if (!sk->dead)
3231                                                         sk->state_change(sk);
3232                                         }
3233 
3234                                         /*
3235                                          *      Force an immediate ack.
3236                                          */
3237                                          
3238                                         sk->ack_backlog = sk->max_ack_backlog;
3239                                 }
3240                                 else
3241                                 {
3242                                         break;
3243                                 }
3244                         }
3245 
3246                         /*
3247                          *      This also takes care of updating the window.
3248                          *      This if statement needs to be simplified.
3249                          */
3250                         if (!sk->delay_acks ||
3251                             sk->ack_backlog >= sk->max_ack_backlog || 
3252                             sk->bytes_rcv > sk->max_unacked || th->fin) {
3253         /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
3254                         }
3255                         else 
3256                         {
3257                                 sk->ack_backlog++;
3258                                 if(sk->debug)
3259                                         printk("Ack queued.\n");
3260                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3261                         }
3262                 }
3263         }
3264 
3265         /*
3266          *      If we've missed a packet, send an ack.
3267          *      Also start a timer to send another.
3268          */
3269          
3270         if (!skb->acked) 
3271         {
3272         
3273         /*
3274          *      This is important.  If we don't have much room left,
3275          *      we need to throw out a few packets so we have a good
3276          *      window.  Note that mtu is used, not mss, because mss is really
3277          *      for the send side.  He could be sending us stuff as large as mtu.
3278          */
3279                  
3280                 while (sk->prot->rspace(sk) < sk->mtu) 
3281                 {
3282                         skb1 = skb_peek(&sk->receive_queue);
3283                         if (skb1 == NULL) 
3284                         {
3285                                 printk("INET: tcp.c:tcp_data memory leak detected.\n");
3286                                 break;
3287                         }
3288 
3289                         /*
3290                          *      Don't throw out something that has been acked. 
3291                          */
3292                  
3293                         if (skb1->acked) 
3294                         {
3295                                 break;
3296                         }
3297                 
3298                         skb_unlink(skb1);
3299                         kfree_skb(skb1, FREE_READ);
3300                 }
3301                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3302                 sk->ack_backlog++;
3303                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3304         }
3305         else
3306         {
3307                 /* We missed a packet.  Send an ack to try to resync things. */
3308                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3309         }
3310 
3311         /*
3312          *      Now tell the user we may have some data. 
3313          */
3314          
3315         if (!sk->dead) 
3316         {
3317                 if(sk->debug)
3318                         printk("Data wakeup.\n");
3319                 sk->data_ready(sk,0);
3320         } 
3321         return(0);
3322 }
3323 
3324 
3325 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
3326 {
3327         unsigned long ptr = ntohs(th->urg_ptr);
3328 
3329         if (ptr)
3330                 ptr--;
3331         ptr += th->seq;
3332 
3333         /* ignore urgent data that we've already seen and read */
3334         if (after(sk->copied_seq+1, ptr))
3335                 return;
3336 
3337         /* do we already have a newer (or duplicate) urgent pointer? */
3338         if (sk->urg_data && !after(ptr, sk->urg_seq))
3339                 return;
3340 
3341         /* tell the world about our new urgent pointer */
3342         if (sk->proc != 0) {
3343                 if (sk->proc > 0) {
3344                         kill_proc(sk->proc, SIGURG, 1);
3345                 } else {
3346                         kill_pg(-sk->proc, SIGURG, 1);
3347                 }
3348         }
3349         sk->urg_data = URG_NOTYET;
3350         sk->urg_seq = ptr;
3351 }
3352 
3353 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
3354         unsigned long saddr, unsigned long len)
3355 {
3356         unsigned long ptr;
3357 
3358         /* check if we get a new urgent pointer */
3359         if (th->urg)
3360                 tcp_check_urg(sk,th);
3361 
3362         /* do we wait for any urgent data? */
3363         if (sk->urg_data != URG_NOTYET)
3364                 return 0;
3365 
3366         /* is the urgent pointer pointing into this packet? */
3367         ptr = sk->urg_seq - th->seq + th->doff*4;
3368         if (ptr >= len)
3369                 return 0;
3370 
3371         /* ok, got the correct packet, update info */
3372         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
3373         if (!sk->dead)
3374                 sk->data_ready(sk,0);
3375         return 0;
3376 }
3377 
3378 
3379 /*
3380  *  This deals with incoming fins. 'Linus at 9 O'clock' 8-) 
3381  *
3382  *  If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
3383  *  (and thence onto LAST-ACK and finally, CLOSE, we never enter
3384  *  TIME-WAIT)
3385  *
3386  *  If we are in FINWAIT-1, a received FIN indicates simultaneous
3387  *  close and we go into CLOSING (and later onto TIME-WAIT)
3388  *
3389  *  If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3390  *
3391  */
3392  
3393 static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
3394          unsigned long saddr, struct device *dev)
3395 {
3396         sk->fin_seq = th->seq + skb->len + th->syn + th->fin;
3397 
3398         if (!sk->dead) 
3399         {
3400                 sk->state_change(sk);
3401         }
3402 
3403         switch(sk->state) 
3404         {
3405                 case TCP_SYN_RECV:
3406                 case TCP_SYN_SENT:
3407                 case TCP_ESTABLISHED:
3408                         /*
3409                          * move to CLOSE_WAIT, tcp_data() already handled
3410                          * sending the ack.
3411                          */
3412                         reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
3413                         /*sk->fin_seq = th->seq+1;*/
3414                         tcp_set_state(sk,TCP_CLOSE_WAIT);
3415                         if (th->rst)
3416                                 sk->shutdown = SHUTDOWN_MASK;
3417                         break;
3418 
3419                 case TCP_CLOSE_WAIT:
3420                 case TCP_CLOSING:
3421                         /*
3422                          * received a retransmission of the FIN, do
3423                          * nothing.
3424                          */
3425                         break;
3426                 case TCP_TIME_WAIT:
3427                         /*
3428                          * received a retransmission of the FIN,
3429                          * restart the TIME_WAIT timer.
3430                          */
3431                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3432                         return(0);
3433                 case TCP_FIN_WAIT1:
3434                         /*
3435                          * This case occurs when a simultaneous close
3436                          * happens, we must ack the received FIN and
3437                          * enter the CLOSING state.
3438                          *
3439                          * XXX timeout not set properly
3440                          */
3441 
3442                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3443                         /*sk->fin_seq = th->seq+1;*/
3444                         tcp_set_state(sk,TCP_CLOSING);
3445                         break;
3446                 case TCP_FIN_WAIT2:
3447                         /*
3448                          * received a FIN -- send ACK and enter TIME_WAIT
3449                          */
3450                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3451                         /*sk->fin_seq = th->seq+1;*/
3452                         tcp_set_state(sk,TCP_TIME_WAIT);
3453                         break;
3454                 case TCP_CLOSE:
3455                         /*
3456                          * already in CLOSE
3457                          */
3458                         break;
3459                 default:
3460                         tcp_set_state(sk,TCP_LAST_ACK);
3461         
3462                         /* Start the timers. */
3463                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3464                         return(0);
3465         }
3466         sk->ack_backlog++;
3467 
3468         return(0);
3469 }
3470 
3471 
3472 /* This will accept the next outstanding connection. */
3473 static struct sock *
3474 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
3475 {
3476         struct sock *newsk;
3477         struct sk_buff *skb;
3478   
3479   /*
3480    * We need to make sure that this socket is listening,
3481    * and that it has something pending.
3482    */
3483 
3484         if (sk->state != TCP_LISTEN) 
3485         {
3486                 sk->err = EINVAL;
3487                 return(NULL); 
3488         }
3489 
3490         /* Avoid the race. */
3491         cli();
3492         sk->inuse = 1;
3493 
3494         while((skb = skb_dequeue(&sk->receive_queue)) == NULL) 
3495         {
3496                 if (flags & O_NONBLOCK) 
3497                 {
3498                         sti();
3499                         release_sock(sk);
3500                         sk->err = EAGAIN;
3501                         return(NULL);
3502                 }
3503 
3504                 release_sock(sk);
3505                 interruptible_sleep_on(sk->sleep);
3506                 if (current->signal & ~current->blocked) 
3507                 {
3508                         sti();
3509                         sk->err = ERESTARTSYS;
3510                         return(NULL);
3511                 }
3512                 sk->inuse = 1;
3513         }
3514         sti();
3515 
3516         /*
3517          *      Now all we need to do is return skb->sk. 
3518          */
3519 
3520         newsk = skb->sk;
3521 
3522         kfree_skb(skb, FREE_READ);
3523         sk->ack_backlog--;
3524         release_sock(sk);
3525         return(newsk);
3526 }
3527 
3528 
3529 /*
3530  *      This will initiate an outgoing connection. 
3531  */
3532  
3533 static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3534 {
3535         struct sk_buff *buff;
3536         struct device *dev=NULL;
3537         unsigned char *ptr;
3538         int tmp;
3539         struct tcphdr *t1;
3540         struct rtable *rt;
3541 
3542         if (sk->state != TCP_CLOSE) 
3543                 return(-EISCONN);
3544 
3545         if (addr_len < 8) 
3546                 return(-EINVAL);
3547 
3548         if (usin->sin_family && usin->sin_family != AF_INET) 
3549                 return(-EAFNOSUPPORT);
3550 
3551         /*
3552          *      connect() to INADDR_ANY means loopback (BSD'ism).
3553          */
3554         
3555         if(usin->sin_addr.s_addr==INADDR_ANY)
3556                 usin->sin_addr.s_addr=ip_my_addr();
3557                   
3558         /*
3559          *      Don't want a TCP connection going to a broadcast address 
3560          */
3561 
3562         if (ip_chk_addr(usin->sin_addr.s_addr) == IS_BROADCAST) 
3563         { 
3564                 return -ENETUNREACH;
3565         }
3566   
3567         /*
3568          *      Connect back to the same socket: Blows up so disallow it 
3569          */
3570 
3571         if(sk->saddr == usin->sin_addr.s_addr && sk->num==ntohs(usin->sin_port))
3572                 return -EBUSY;
3573 
3574         sk->inuse = 1;
3575         sk->daddr = usin->sin_addr.s_addr;
3576         sk->write_seq = jiffies * SEQ_TICK - seq_offset;
3577         sk->window_seq = sk->write_seq;
3578         sk->rcv_ack_seq = sk->write_seq -1;
3579         sk->err = 0;
3580         sk->dummy_th.dest = usin->sin_port;
3581         release_sock(sk);
3582 
3583         buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3584         if (buff == NULL) 
3585         {
3586                 return(-ENOMEM);
3587         }
3588         sk->inuse = 1;
3589         buff->len = 24;
3590         buff->sk = sk;
3591         buff->free = 1;
3592         buff->localroute = sk->localroute;
3593         
3594         t1 = (struct tcphdr *) buff->data;
3595 
3596         /*
3597          *      Put in the IP header and routing stuff. 
3598          */
3599          
3600         rt=ip_rt_route(sk->daddr, NULL, NULL);
3601         
3602 
3603         /*
3604          *      We need to build the routing stuff from the things saved in skb. 
3605          */
3606 
3607         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3608                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3609         if (tmp < 0) 
3610         {
3611                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3612                 release_sock(sk);
3613                 return(-ENETUNREACH);
3614         }
3615 
3616         buff->len += tmp;
3617         t1 = (struct tcphdr *)((char *)t1 +tmp);
3618 
3619         memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3620         t1->seq = ntohl(sk->write_seq++);
3621         sk->sent_seq = sk->write_seq;
3622         buff->h.seq = sk->write_seq;
3623         t1->ack = 0;
3624         t1->window = 2;
3625         t1->res1=0;
3626         t1->res2=0;
3627         t1->rst = 0;
3628         t1->urg = 0;
3629         t1->psh = 0;
3630         t1->syn = 1;
3631         t1->urg_ptr = 0;
3632         t1->doff = 6;
3633         /* use 512 or whatever user asked for */
3634         
3635         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
3636                 sk->window_clamp=rt->rt_window;
3637         else
3638                 sk->window_clamp=0;
3639 
3640         if (sk->user_mss)
3641                 sk->mtu = sk->user_mss;
3642         else if(rt!=NULL && (rt->rt_flags&RTF_MTU))
3643                 sk->mtu = rt->rt_mss;
3644         else 
3645         {
3646 #ifdef CONFIG_INET_SNARL
3647                 if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3648 #else
3649                 if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3650 #endif
3651                         sk->mtu = 576 - HEADER_SIZE;
3652                 else
3653                         sk->mtu = MAX_WINDOW;
3654         }
3655         /*
3656          *      but not bigger than device MTU 
3657          */
3658 
3659         if(sk->mtu <32)
3660                 sk->mtu = 32;   /* Sanity limit */
3661                 
3662         sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3663         
3664         /*
3665          *      Put in the TCP options to say MTU. 
3666          */
3667 
3668         ptr = (unsigned char *)(t1+1);
3669         ptr[0] = 2;
3670         ptr[1] = 4;
3671         ptr[2] = (sk->mtu) >> 8;
3672         ptr[3] = (sk->mtu) & 0xff;
3673         tcp_send_check(t1, sk->saddr, sk->daddr,
3674                   sizeof(struct tcphdr) + 4, sk);
3675 
3676         /*
3677          *      This must go first otherwise a really quick response will get reset. 
3678          */
3679 
3680         tcp_set_state(sk,TCP_SYN_SENT);
3681 /*      sk->rtt = TCP_CONNECT_TIME;*/
3682         sk->rto = TCP_TIMEOUT_INIT;
3683         reset_timer(sk, TIME_WRITE, sk->rto);   /* Timer for repeating the SYN until an answer */
3684         sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3685 
3686         sk->prot->queue_xmit(sk, dev, buff, 0);  
3687         tcp_statistics.TcpActiveOpens++;
3688         tcp_statistics.TcpOutSegs++;
3689   
3690         release_sock(sk);
3691         return(0);
3692 }
3693 
3694 
3695 /* This functions checks to see if the tcp header is actually acceptable. */
3696 static int
3697 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3698              struct options *opt, unsigned long saddr, struct device *dev)
3699 {
3700         unsigned long next_seq;
3701 
3702         next_seq = len - 4*th->doff;
3703         if (th->fin)
3704                 next_seq++;
3705         /* if we have a zero window, we can't have any data in the packet.. */
3706         if (next_seq && !sk->window)
3707                 goto ignore_it;
3708         next_seq += th->seq;
3709 
3710         /*
3711          * This isn't quite right.  sk->acked_seq could be more recent
3712          * than sk->window.  This is however close enough.  We will accept
3713          * slightly more packets than we should, but it should not cause
3714          * problems unless someone is trying to forge packets.
3715          */
3716 
3717         /* have we already seen all of this packet? */
3718         if (!after(next_seq+1, sk->acked_seq))
3719                 goto ignore_it;
3720         /* or does it start beyond the window? */
3721         if (!before(th->seq, sk->acked_seq + sk->window + 1))
3722                 goto ignore_it;
3723 
3724         /* ok, at least part of this packet would seem interesting.. */
3725         return 1;
3726 
3727 ignore_it:
3728         if (th->rst)
3729                 return 0;
3730 
3731         /*
3732          *      Send a reset if we get something not ours and we are
3733          *      unsynchronized. Note: We don't do anything to our end. We
3734          *      are just killing the bogus remote connection then we will
3735          *      connect again and it will work (with luck).
3736          */
3737          
3738         if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) {
3739                 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3740                 return 1;
3741         }
3742 
3743         /* Try to resync things. */
3744         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3745         return 0;
3746 }
3747 
3748 
3749 #ifdef TCP_FASTPATH
3750 /*
3751  *      Is the end of the queue clear of fragments as yet unmerged into the data stream
3752  *      Yes if
3753  *      a) The queue is empty
3754  *      b) The last frame on the queue has the acked flag set
3755  */
3756 
3757 static inline int tcp_clean_end(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3758 {
3759         struct sk_buff *skb=skb_peek(&sk->receive_queue);
3760         if(skb==NULL || sk->receive_queue.prev->acked)
3761                 return 1;
3762 }
3763 
3764 #endif
3765 
3766 int
3767 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3768         unsigned long daddr, unsigned short len,
3769         unsigned long saddr, int redo, struct inet_protocol * protocol)
3770 {
3771         struct tcphdr *th;
3772         struct sock *sk;
3773 
3774         if (!skb) 
3775         {
3776                 return(0);
3777         }
3778 
3779         if (!dev) 
3780         {
3781                 return(0);
3782         }
3783   
3784         tcp_statistics.TcpInSegs++;
3785   
3786         if(skb->pkt_type!=PACKET_HOST)
3787         {
3788                 kfree_skb(skb,FREE_READ);
3789                 return(0);
3790         }
3791   
3792         th = skb->h.th;
3793 
3794         /*
3795          *      Find the socket.
3796          */
3797 
3798         sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3799 
3800         /*
3801          *      If this socket has got a reset its to all intents and purposes 
3802          *      really dead 
3803          */
3804          
3805         if (sk!=NULL && sk->zapped)
3806                 sk=NULL;
3807 
3808         if (!redo) 
3809         {
3810                 if (tcp_check(th, len, saddr, daddr )) 
3811                 {
3812                         skb->sk = NULL;
3813                         kfree_skb(skb,FREE_READ);
3814                         /*
3815                          * We don't release the socket because it was
3816                          * never marked in use.
3817                          */
3818                         return(0);
3819                 }
3820                 th->seq = ntohl(th->seq);
3821 
3822                 /* See if we know about the socket. */
3823                 if (sk == NULL) 
3824                 {
3825                         if (!th->rst)
3826                                 tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3827                         skb->sk = NULL;
3828                         kfree_skb(skb, FREE_READ);
3829                         return(0);
3830                 }
3831 
3832                 skb->len = len;
3833                 skb->sk = sk;
3834                 skb->acked = 0;
3835                 skb->used = 0;
3836                 skb->free = 0;
3837                 skb->saddr = daddr;
3838                 skb->daddr = saddr;
3839         
3840                 /* We may need to add it to the backlog here. */
3841                 cli();
3842                 if (sk->inuse) 
3843                 {
3844                         skb_queue_head(&sk->back_log, skb);
3845                         sti();
3846                         return(0);
3847                 }
3848                 sk->inuse = 1;
3849                 sti();
3850         }
3851         else
3852         {
3853                 if (!sk) 
3854                 {
3855                         return(0);
3856                 }
3857         }
3858 
3859 
3860         if (!sk->prot) 
3861         {
3862                 return(0);
3863         }
3864 
3865 
3866         /*
3867          *      Charge the memory to the socket. 
3868          */
3869          
3870         if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) 
3871         {
3872                 skb->sk = NULL;
3873                 kfree_skb(skb, FREE_READ);
3874                 release_sock(sk);
3875                 return(0);
3876         }
3877 
3878         sk->rmem_alloc += skb->mem_len;
3879 
3880 #ifdef TCP_FASTPATH
3881 /*
3882  *      Incoming data stream fastpath. 
3883  *
3884  *      We try to optimise two things.
3885  *      1) Spot general data arriving without funny options and skip extra checks and the switch.
3886  *      2) Spot the common case in raw data receive streams of a packet that has no funny options,
3887  *      fits exactly on the end of the current queue and may or may not have the ack bit set.
3888  *
3889  *      Case two especially is done inline in this routine so there are no long jumps causing heavy
3890  *      cache thrashing, no function call overhead (except for the ack sending if needed) and for
3891  *      speed although further optimizing here is possible.
3892  */
3893  
3894         /* I'm trusting gcc to optimise this sensibly... might need judicious application of a software mallet */
3895         if(!(sk->shutdown & RCV_SHUTDOWN) && sk->state==TCP_ESTABLISHED && !th->urg && !th->syn && !th->fin && !th->rst)
3896         {       
3897                 /* Packets in order. Fits window */
3898                 if(th->seq == sk->acked_seq+1 && sk->window && tcp_clean_end(sk))
3899                 {
3900                         /* Ack is harder */
3901                         if(th->ack && !tcp_ack(sk, th, saddr, len))
3902                         {
3903                                 kfree_skb(skb, FREE_READ);
3904                                 release_sock(sk);
3905                                 return 0;
3906                         }
3907                         /*
3908                          *      Set up variables
3909                          */
3910                         skb->len -= (th->doff *4);
3911                         sk->bytes_rcv += skb->len;
3912                         tcp_rx_hit2++;
3913                         if(skb->len)
3914                         {
3915                                 skb_queue_tail(&sk->receive_queue,skb); /* We already know where to put it */
3916                                 if(sk->window >= skb->len)
3917                                         sk->window-=skb->len;                   /* We know its effect on the window */
3918                                 else
3919                                         sk->window=0;
3920                                 sk->acked_seq = th->seq+skb->len;       /* Easy */
3921                                 skb->acked=1;                           /* Guaranteed true */
3922                                 if(!sk->delay_acks || sk->ack_backlog >= sk->max_ack_backlog || 
3923                                         sk->bytes_rcv > sk->max_unacked)
3924                                 {
3925                                         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th , saddr);
3926                                 }
3927                                 else
3928                                 {
3929                                         sk->ack_backlog++;
3930                                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3931                                 }
3932                                 if(!sk->dead)
3933                                         sk->data_ready(sk,0);
3934                                 release_sock(sk);
3935                                 return 0;
3936                         }
3937                 }
3938                 /*
3939                  *      More generic case of arriving data stream in ESTABLISHED
3940                  */
3941                 tcp_rx_hit1++;
3942                 if(!tcp_sequence(sk, th, len, opt, saddr, dev))
3943                 {
3944                         kfree_skb(skb, FREE_READ);
3945                         release_sock(sk);
3946                         return 0;
3947                 }
3948                 if(th->ack && !tcp_ack(sk, th, saddr, len))
3949                 {
3950                         kfree_skb(skb, FREE_READ);
3951                         release_sock(sk);
3952                         return 0;
3953                 }
3954                 if(tcp_data(skb, sk, saddr, len))
3955                         kfree_skb(skb, FREE_READ);
3956                 release_sock(sk);
3957                 return 0;
3958         }
3959         tcp_rx_miss++;
3960 #endif  
3961 
3962         /*
3963          *      Now deal with all cases.
3964          */
3965          
3966         switch(sk->state) 
3967         {
3968         
3969                 /*
3970                  * This should close the system down if it's waiting
3971                  * for an ack that is never going to be sent.
3972                  */
3973                 case TCP_LAST_ACK:
3974                         if (th->rst) 
3975                         {
3976                                 sk->zapped=1;
3977                                 sk->err = ECONNRESET;
3978                                 tcp_set_state(sk,TCP_CLOSE);
3979                                 sk->shutdown = SHUTDOWN_MASK;
3980                                 if (!sk->dead) 
3981                                 {
3982                                         sk->state_change(sk);
3983                                 }
3984                                 kfree_skb(skb, FREE_READ);
3985                                 release_sock(sk);
3986                                 return(0);
3987                         }
3988 
3989                 case TCP_ESTABLISHED:
3990                 case TCP_CLOSE_WAIT:
3991                 case TCP_CLOSING:
3992                 case TCP_FIN_WAIT1:
3993                 case TCP_FIN_WAIT2:
3994                 case TCP_TIME_WAIT:
3995                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
3996                         {
3997                                 kfree_skb(skb, FREE_READ);
3998                                 release_sock(sk);
3999                                 return(0);
4000                         }
4001 
4002                         if (th->rst) 
4003                         {
4004                                 tcp_statistics.TcpEstabResets++;
4005                                 sk->zapped=1;
4006                                 /* This means the thing should really be closed. */
4007                                 sk->err = ECONNRESET;
4008                                 if (sk->state == TCP_CLOSE_WAIT) 
4009                                 {
4010                                         sk->err = EPIPE;
4011                                 }
4012         
4013                                 /*
4014                                  * A reset with a fin just means that
4015                                  * the data was not all read.
4016                                  */
4017                                 tcp_set_state(sk,TCP_CLOSE);
4018                                 sk->shutdown = SHUTDOWN_MASK;
4019                                 if (!sk->dead) 
4020                                 {
4021                                         sk->state_change(sk);
4022                                 }
4023                                 kfree_skb(skb, FREE_READ);
4024                                 release_sock(sk);
4025                                 return(0);
4026                         }
4027                         if (th->syn) 
4028                         {
4029                                 tcp_statistics.TcpEstabResets++;
4030                                 sk->err = ECONNRESET;
4031                                 tcp_set_state(sk,TCP_CLOSE);
4032                                 sk->shutdown = SHUTDOWN_MASK;
4033                                 tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
4034                                 if (!sk->dead) {
4035                                         sk->state_change(sk);
4036                                 }
4037                                 kfree_skb(skb, FREE_READ);
4038                                 release_sock(sk);
4039                                 return(0);
4040                         }
4041         
4042                         if (th->ack && !tcp_ack(sk, th, saddr, len)) {
4043                                 kfree_skb(skb, FREE_READ);
4044                                 release_sock(sk);
4045                                 return(0);
4046                         }
4047         
4048                         if (tcp_urg(sk, th, saddr, len)) {
4049                                 kfree_skb(skb, FREE_READ);
4050                                 release_sock(sk);
4051                                 return(0);
4052                         }
4053 
4054         
4055                         if (tcp_data(skb, sk, saddr, len)) {
4056                                 kfree_skb(skb, FREE_READ);
4057                                 release_sock(sk);
4058                                 return(0);
4059                         }       
4060 
4061                         if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) {
4062                                 kfree_skb(skb, FREE_READ);
4063                                 release_sock(sk);
4064                                 return(0);
4065                         }
4066         
4067                         release_sock(sk);
4068                         return(0);
4069                 
4070                 case TCP_CLOSE:
4071                         if (sk->dead || sk->daddr) {
4072                                 kfree_skb(skb, FREE_READ);
4073                                         release_sock(sk);
4074                                 return(0);
4075                         }
4076         
4077                         if (!th->rst) {
4078                                 if (!th->ack)
4079                                         th->ack_seq = 0;
4080                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4081                         }
4082                         kfree_skb(skb, FREE_READ);
4083                         release_sock(sk);
4084                                 return(0);
4085         
4086                 case TCP_LISTEN:
4087                         if (th->rst) {
4088                                 kfree_skb(skb, FREE_READ);
4089                                 release_sock(sk);
4090                                 return(0);
4091                         }
4092                         if (th->ack) {
4093                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4094                                 kfree_skb(skb, FREE_READ);
4095                                 release_sock(sk);
4096                                 return(0);
4097                         }
4098         
4099                         if (th->syn) 
4100                         {
4101                                 /*
4102                                  * Now we just put the whole thing including
4103                                  * the header and saddr, and protocol pointer
4104                                  * into the buffer.  We can't respond until the
4105                                  * user tells us to accept the connection.
4106                                  */
4107                                 tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
4108                                 release_sock(sk);
4109                                 return(0);
4110                         }
4111 
4112                         kfree_skb(skb, FREE_READ);
4113                         release_sock(sk);
4114                         return(0);
4115 
4116                 case TCP_SYN_RECV:
4117                         if (th->syn) {
4118                                 /* Probably a retransmitted syn */
4119                                 kfree_skb(skb, FREE_READ);
4120                                 release_sock(sk);
4121                                 return(0);
4122                         }
4123         
4124         
4125                 default:
4126                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
4127                         {
4128                                 kfree_skb(skb, FREE_READ);
4129                                 release_sock(sk);
4130                                 return(0);
4131                         }
4132         
4133                 case TCP_SYN_SENT:
4134                         if (th->rst) 
4135                         {
4136                                 tcp_statistics.TcpAttemptFails++;
4137                                 sk->err = ECONNREFUSED;
4138                                 tcp_set_state(sk,TCP_CLOSE);
4139                                 sk->shutdown = SHUTDOWN_MASK;
4140                                 sk->zapped = 1;
4141                                 if (!sk->dead) 
4142                                 {
4143                                         sk->state_change(sk);
4144                                 }
4145                                 kfree_skb(skb, FREE_READ);
4146                                 release_sock(sk);
4147                                 return(0);
4148                         }
4149                         if (!th->ack) 
4150                         {
4151                                 if (th->syn) 
4152                                 {
4153                                         tcp_set_state(sk,TCP_SYN_RECV);
4154                                 }
4155                                 kfree_skb(skb, FREE_READ);
4156                                 release_sock(sk);
4157                                 return(0);
4158                         }
4159         
4160                         switch(sk->state) 
4161                         {
4162                                 case TCP_SYN_SENT:
4163                                         if (!tcp_ack(sk, th, saddr, len)) 
4164                                         {
4165                                                 tcp_statistics.TcpAttemptFails++;
4166                                                 tcp_reset(daddr, saddr, th,
4167                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4168                                                 kfree_skb(skb, FREE_READ);
4169                                                         release_sock(sk);
4170                                                 return(0);
4171                                         }
4172         
4173                                         /*
4174                                          * If the syn bit is also set, switch to
4175                                          * tcp_syn_recv, and then to established.
4176                                          */
4177                                         if (!th->syn) 
4178                                         {
4179                                                 kfree_skb(skb, FREE_READ);
4180                                                 release_sock(sk);
4181                                                 return(0);
4182                                         }
4183         
4184                                         /* Ack the syn and fall through. */
4185                                         sk->acked_seq = th->seq+1;
4186                                         sk->fin_seq = th->seq;
4187                                         tcp_send_ack(sk->sent_seq, th->seq+1,
4188                                                 sk, th, sk->daddr);
4189                 
4190                                 case TCP_SYN_RECV:
4191                                         if (!tcp_ack(sk, th, saddr, len)) 
4192                                         {
4193                                                 tcp_statistics.TcpAttemptFails++;
4194                                                 tcp_reset(daddr, saddr, th,
4195                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
4196                                                 kfree_skb(skb, FREE_READ);
4197                                                 release_sock(sk);
4198                                                 return(0);
4199                                         }
4200         
4201                                         tcp_set_state(sk,TCP_ESTABLISHED);
4202         
4203                                         /*
4204                                          *      Now we need to finish filling out
4205                                          *      some of the tcp header.
4206                                          * 
4207                                          *      We need to check for mtu info. 
4208                                          */
4209                                         tcp_options(sk, th);
4210                                         sk->dummy_th.dest = th->source;
4211                                         sk->copied_seq = sk->acked_seq-1;
4212                                         if (!sk->dead) 
4213                                         {
4214                                                 sk->state_change(sk);
4215                                         }
4216         
4217                                         /*
4218                                          * We've already processed his first
4219                                          * ack.  In just about all cases that
4220                                          * will have set max_window.  This is
4221                                          * to protect us against the possibility
4222                                          * that the initial window he sent was 0.
4223                                          * This must occur after tcp_options, which
4224                                          * sets sk->mtu.
4225                                          */
4226                                         if (sk->max_window == 0) 
4227                                         {
4228                                                 sk->max_window = 32;
4229                                                 sk->mss = min(sk->max_window, sk->mtu);
4230                                         }
4231 
4232                                         /*
4233                                          * Now process the rest like we were
4234                                          * already in the established state.
4235                                          */
4236                                         if (th->urg) 
4237                                         {
4238                                                 if (tcp_urg(sk, th, saddr, len)) 
4239                                                 { 
4240                                                         kfree_skb(skb, FREE_READ);
4241                                                         release_sock(sk);
4242                                                         return(0);
4243                                                 }
4244                                         }
4245                                         if (tcp_data(skb, sk, saddr, len))
4246                                                 kfree_skb(skb, FREE_READ);
4247 
4248                                         if (th->fin)
4249                                                 tcp_fin(skb, sk, th, saddr, dev);
4250                                         release_sock(sk);
4251                                         return(0);
4252                         }
4253         
4254                         if (th->urg) 
4255                         {
4256                                 if (tcp_urg(sk, th, saddr, len)) 
4257                                 {
4258                                         kfree_skb(skb, FREE_READ);
4259                                         release_sock(sk);
4260                                         return(0);
4261                                 }
4262                         }
4263                         if (tcp_data(skb, sk, saddr, len)) 
4264                         {
4265                                 kfree_skb(skb, FREE_READ);
4266                                 release_sock(sk);
4267                                 return(0);
4268                         }
4269         
4270                         if (!th->fin) 
4271                         {
4272                                 release_sock(sk);
4273                                 return(0);
4274                         }
4275                         tcp_fin(skb, sk, th, saddr, dev);
4276                         release_sock(sk);
4277                         return(0);
4278         }
4279 }
4280 
4281 
4282 /*
4283  * This routine sends a packet with an out of date sequence
4284  * number. It assumes the other end will try to ack it.
4285  */
4286 
4287 static void tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4288 {
4289         struct sk_buff *buff;
4290         struct tcphdr *t1;
4291         struct device *dev=NULL;
4292         int tmp;
4293 
4294         if (sk->zapped)
4295                 return; /* After a valid reset we can send no more */
4296 
4297         /*
4298          * Write data can still be transmitted/retransmitted in the
4299          * following states.  If any other state is encountered, return.
4300          */
4301 
4302         if (sk->state != TCP_ESTABLISHED && 
4303             sk->state != TCP_CLOSE_WAIT &&
4304             sk->state != TCP_FIN_WAIT1 && 
4305             sk->state != TCP_LAST_ACK &&
4306             sk->state != TCP_CLOSING
4307         ) {
4308                 return;
4309         }
4310 
4311         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
4312         if (buff == NULL) 
4313                 return;
4314 
4315         buff->len = sizeof(struct tcphdr);
4316         buff->free = 1;
4317         buff->sk = sk;
4318         buff->localroute = sk->localroute;
4319 
4320         t1 = (struct tcphdr *) buff->data;
4321 
4322         /* Put in the IP header and routing stuff. */
4323         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
4324                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
4325         if (tmp < 0) 
4326         {
4327                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
4328                 return;
4329         }
4330 
4331         buff->len += tmp;
4332         t1 = (struct tcphdr *)((char *)t1 +tmp);
4333 
4334         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
4335 
4336         /*
4337          * Use a previous sequence.
4338          * This should cause the other end to send an ack.
4339          */
4340         t1->seq = htonl(sk->sent_seq-1);
4341         t1->ack = 1; 
4342         t1->res1= 0;
4343         t1->res2= 0;
4344         t1->rst = 0;
4345         t1->urg = 0;
4346         t1->psh = 0;
4347         t1->fin = 0;
4348         t1->syn = 0;
4349         t1->ack_seq = ntohl(sk->acked_seq);
4350         t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
4351         t1->doff = sizeof(*t1)/4;
4352         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
4353 
4354          /*     Send it and free it.
4355           *     This will prevent the timer from automatically being restarted.
4356           */
4357         sk->prot->queue_xmit(sk, dev, buff, 1);
4358         tcp_statistics.TcpOutSegs++;
4359 }
4360 
4361 void
4362 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4363 {
4364         if (sk->zapped)
4365                 return;         /* After a valid reset we can send no more */
4366 
4367         tcp_write_wakeup(sk);
4368 
4369         sk->backoff++;
4370         sk->rto = min(sk->rto << 1, 120*HZ);
4371         reset_timer (sk, TIME_PROBE0, sk->rto);
4372         sk->retransmits++;
4373         sk->prot->retransmits ++;
4374 }
4375 
4376 /*
4377  *      Socket option code for TCP. 
4378  */
4379   
4380 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4381 {
4382         int val,err;
4383 
4384         if(level!=SOL_TCP)
4385                 return ip_setsockopt(sk,level,optname,optval,optlen);
4386 
4387         if (optval == NULL) 
4388                 return(-EINVAL);
4389 
4390         err=verify_area(VERIFY_READ, optval, sizeof(int));
4391         if(err)
4392                 return err;
4393         
4394         val = get_fs_long((unsigned long *)optval);
4395 
4396         switch(optname)
4397         {
4398                 case TCP_MAXSEG:
4399 /*                      if(val<200||val>2048 || val>sk->mtu) */
4400 /*
4401  * values greater than interface MTU won't take effect.  however at
4402  * the point when this call is done we typically don't yet know
4403  * which interface is going to be used
4404  */
4405                         if(val<1||val>MAX_WINDOW)
4406                                 return -EINVAL;
4407                         sk->user_mss=val;
4408                         return 0;
4409                 case TCP_NODELAY:
4410                         sk->nonagle=(val==0)?0:1;
4411                         return 0;
4412                 default:
4413                         return(-ENOPROTOOPT);
4414         }
4415 }
4416 
4417 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4418 {
4419         int val,err;
4420 
4421         if(level!=SOL_TCP)
4422                 return ip_getsockopt(sk,level,optname,optval,optlen);
4423                         
4424         switch(optname)
4425         {
4426                 case TCP_MAXSEG:
4427                         val=sk->user_mss;
4428                         break;
4429                 case TCP_NODELAY:
4430                         val=sk->nonagle;        /* Until Johannes stuff is in */
4431                         break;
4432                 default:
4433                         return(-ENOPROTOOPT);
4434         }
4435         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
4436         if(err)
4437                 return err;
4438         put_fs_long(sizeof(int),(unsigned long *) optlen);
4439 
4440         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
4441         if(err)
4442                 return err;
4443         put_fs_long(val,(unsigned long *)optval);
4444 
4445         return(0);
4446 }       
4447 
4448 
4449 struct proto tcp_prot = {
4450         sock_wmalloc,
4451         sock_rmalloc,
4452         sock_wfree,
4453         sock_rfree,
4454         sock_rspace,
4455         sock_wspace,
4456         tcp_close,
4457         tcp_read,
4458         tcp_write,
4459         tcp_sendto,
4460         tcp_recvfrom,
4461         ip_build_header,
4462         tcp_connect,
4463         tcp_accept,
4464         ip_queue_xmit,
4465         tcp_retransmit,
4466         tcp_write_wakeup,
4467         tcp_read_wakeup,
4468         tcp_rcv,
4469         tcp_select,
4470         tcp_ioctl,
4471         NULL,
4472         tcp_shutdown,
4473         tcp_setsockopt,
4474         tcp_getsockopt,
4475         128,
4476         0,
4477         {NULL,},
4478         "TCP"
4479 };

/* [previous][next][first][last][top][bottom][index][help] */