Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
[arm] Add optimised TCP/IP checksumming for 64-bit ARM
Signed-off-by: Michael Brown <mcb30@ipxe.org>
  • Loading branch information
mcb30 committed May 11, 2016
1 parent 95716ec commit 47931a4
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 0 deletions.
File renamed without changes.
175 changes: 175 additions & 0 deletions src/arch/arm64/core/arm64_tcpip.c
@@ -0,0 +1,175 @@
/*
* Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*
* You can also choose to distribute this program under the terms of
* the Unmodified Binary Distribution Licence (as given in the file
* COPYING.UBDL), provided that you have satisfied its requirements.
*/

FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );

/** @file
*
* TCP/IP checksum
*
*/

#include <strings.h>
#include <ipxe/tcpip.h>

/** Alignment used by main checksumming loop */
#define TCPIP_CHKSUM_ALIGN 16

/** Number of steps in each iteration of the unrolled main checksumming loop */
#define TCPIP_CHKSUM_UNROLL 4

/**
* Calculate continued TCP/IP checkum
*
* @v sum Checksum of already-summed data, in network byte order
* @v data Data buffer
* @v len Length of data buffer
* @ret sum Updated checksum, in network byte order
*/
uint16_t tcpip_continue_chksum ( uint16_t sum, const void *data,
size_t len ) {
intptr_t start;
intptr_t end;
intptr_t mid;
unsigned int pre;
unsigned int post;
unsigned int first;
uint64_t discard_low;
uint64_t discard_high;

/* Avoid potentially undefined shift operation */
if ( len == 0 )
return sum;

/* Find maximally-aligned midpoint. For short blocks of data,
* this may be aligned to fewer than 16 bytes.
*/
start = ( ( intptr_t ) data );
end = ( start + len );
mid = ( end &
~( ( ~( 1UL << 63 ) ) >> ( 64 - flsl ( start ^ end ) ) ) );

/* Calculate pre- and post-alignment lengths */
pre = ( ( mid - start ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );
post = ( ( end - mid ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );

/* Calculate number of steps in first iteration of unrolled loop */
first = ( ( ( len - pre - post ) / TCPIP_CHKSUM_ALIGN ) &
( TCPIP_CHKSUM_UNROLL - 1 ) );

/* Calculate checksum */
__asm__ ( /* Invert sum */
"eor %w0, %w0, #0xffff\n\t"
/* Clear carry flag */
"cmn xzr, xzr\n\t"
/* Byteswap and sum pre-alignment byte, if applicable */
"tbz %w4, #0, 1f\n\t"
"ldrb %w2, [%1], #1\n\t"
"rev16 %w0, %w0\n\t"
"rev16 %w2, %w2\n\t"
"adcs %0, %0, %2\n\t"
"\n1:\n\t"
/* Sum pre-alignment halfword, if applicable */
"tbz %w4, #1, 1f\n\t"
"ldrh %w2, [%1], #2\n\t"
"adcs %0, %0, %2\n\t"
"\n1:\n\t"
/* Sum pre-alignment word, if applicable */
"tbz %w4, #2, 1f\n\t"
"ldr %w2, [%1], #4\n\t"
"adcs %0, %0, %2\n\t"
"\n1:\n\t"
/* Sum pre-alignment doubleword, if applicable */
"tbz %w4, #3, 1f\n\t"
"ldr %2, [%1], #8\n\t"
"adcs %0, %0, %2\n\t"
"\n1:\n\t"
/* Jump into unrolled (x4) main loop */
"adr %2, 2f\n\t"
"sub %2, %2, %5, lsl #3\n\t"
"sub %2, %2, %5, lsl #2\n\t"
"br %2\n\t"
"\n1:\n\t"
"ldp %2, %3, [%1], #16\n\t"
"adcs %0, %0, %2\n\t"
"adcs %0, %0, %3\n\t"
"ldp %2, %3, [%1], #16\n\t"
"adcs %0, %0, %2\n\t"
"adcs %0, %0, %3\n\t"
"ldp %2, %3, [%1], #16\n\t"
"adcs %0, %0, %2\n\t"
"adcs %0, %0, %3\n\t"
"ldp %2, %3, [%1], #16\n\t"
"adcs %0, %0, %2\n\t"
"adcs %0, %0, %3\n\t"
"\n2:\n\t"
"sub %2, %1, %6\n\t"
"cbnz %2, 1b\n\t"
/* Sum post-alignment doubleword, if applicable */
"tbz %w7, #3, 1f\n\t"
"ldr %2, [%1], #8\n\t"
"adcs %0, %0, %2\n\t"
"\n1:\n\t"
/* Sum post-alignment word, if applicable */
"tbz %w7, #2, 1f\n\t"
"ldr %w2, [%1], #4\n\t"
"adcs %0, %0, %2\n\t"
"\n1:\n\t"
/* Sum post-alignment halfword, if applicable */
"tbz %w7, #1, 1f\n\t"
"ldrh %w2, [%1], #2\n\t"
"adcs %0, %0, %2\n\t"
"\n1:\n\t"
/* Sum post-alignment byte, if applicable */
"tbz %w7, #0, 1f\n\t"
"ldrb %w2, [%1], #1\n\t"
"adcs %0, %0, %2\n\t"
"\n1:\n\t"
/* Fold down to a uint32_t plus carry flag */
"lsr %2, %0, #32\n\t"
"adcs %w0, %w0, %w2\n\t"
/* Fold down to a uint16_t plus carry in bit 16 */
"ubfm %2, %0, #0, #15\n\t"
"ubfm %3, %0, #16, #31\n\t"
"adc %w0, %w2, %w3\n\t"
/* Fold down to a uint16_t */
"tbz %w0, #16, 1f\n\t"
"mov %w2, #0xffff\n\t"
"sub %w0, %w0, %w2\n\t"
"tbz %w0, #16, 1f\n\t"
"sub %w0, %w0, %w2\n\t"
"\n1:\n\t"
/* Byteswap back, if applicable */
"tbz %w4, #0, 1f\n\t"
"rev16 %w0, %w0\n\t"
"\n1:\n\t"
/* Invert sum */
"eor %w0, %w0, #0xffff\n\t"
: "+r" ( sum ), "+r" ( data ), "=&r" ( discard_low ),
"=&r" ( discard_high )
: "r" ( pre ), "r" ( first ), "r" ( end - post ),
"r" ( post )
: "cc" );

return sum;
}
15 changes: 15 additions & 0 deletions src/arch/arm64/include/bits/tcpip.h
@@ -0,0 +1,15 @@
#ifndef _BITS_TCPIP_H
#define _BITS_TCPIP_H

/** @file
*
* Transport-network layer interface
*
*/

FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );

extern uint16_t tcpip_continue_chksum ( uint16_t sum, const void *data,
size_t len );

#endif /* _BITS_TCPIP_H */

0 comments on commit 47931a4

Please sign in to comment.