diff -u nbsd/Makefile ./Makefile --- nbsd/Makefile Wed Apr 24 14:48:20 2002 +++ ./Makefile Wed Apr 24 14:49:36 2002 @@ -39,7 +39,8 @@ @echo "make aix-tk3play IBM AIX" @echo "make os2 IBM OS/2" @echo "make netbsd NetBSD" - @echo "make netbsd-arm32 NetBSD for strongarm" + @echo "make netbsd-arm32 NetBSD for strongarm (32bit fixedpoint)" + @echo "make netbsd-shark NetBSD for strongarm (64bit fixedpoint)" @echo "make netbsd-i386 NetBSD optimized for i386" @echo "make netbsd-i386-elf NetBSD optimized for i386, ELF format" @echo "make netbsd-m68k NetBSD optimized for m68k" @@ -392,6 +393,16 @@ -march=armv4 -mtune=strongarm -mcpu=strongarm \ -DREAL_IS_FIXED -DUSE_MMAP -DREAD_MMAP -DTERM_CONTROL \ -DNETBSD' \ + mpg123-make + +netbsd-shark: + $(MAKE) CC=cc LDFLAGS= \ + OBJECTS='decode.o dct64.o audio_sun.o term.o' \ + CFLAGS='$(CFLAGS) -Wall -O6 -fomit-frame-pointer \ + -funroll-all-loops -finline-functions -ffast-math \ + -march=armv4 -mtune=strongarm110 -mcpu=strongarm110 \ + -DREAL_IS_FIXED_ARMV4 -DUSE_MMAP -DREAD_MMAP \ + -DTERM_CONTROL -DNETBSD' \ mpg123-make solaris-gcc-esd: Only in ./: mpg123-64.patch diff -u nbsd/mpg123.h ./mpg123.h --- nbsd/mpg123.h Wed Apr 24 14:49:13 2002 +++ ./mpg123.h Wed Apr 24 14:49:41 2002 @@ -67,7 +67,87 @@ # define DOUBLE_TO_REAL(x) ((int)((x) * REAL_FACTOR)) # define REAL_TO_SHORT(x) ((x) >> REAL_RADIX) + # define REAL_MUL(x, y) (((long long)(x) * (long long)(y)) >> REAL_RADIX) + +#elif defined(REAL_IS_FIXED_ARMV4) +# define REAL_IS_FIXED +# define real long long + +# define REAL_RADIX 32 +# define REAL_FACTOR (4294967296.0) + +# define REAL_PLUS_32767 ( 32767LL << REAL_RADIX ) +# define REAL_MINUS_32768 ( -32768LL << REAL_RADIX ) + +# define DOUBLE_TO_REAL(x) ((long long)((x) * REAL_FACTOR)) +# define REAL_TO_SHORT(x) ((x) >> REAL_RADIX) + +/* + * "Reals" are signed, 15 bits before the point, 32 bits after. + * Multiplication is done like so: + * + * if |x| < 2^8, shift x >> 7, else shift x >>15 + * if |y| < 2^8, shift y >> 7, else shift y >>15 + * multiply x * y (just avoids clipping MSB) + * shift the result >> 2, 10 or 18 bits, to make a total shift of 32 bits + * + * I'd do it in C, but gcc optimises it rather poorly (2K stack + * frames, anyone?). Hence the bit of (ARMv4) asm magic below. + * (Unfortunately, the CLZ opcode, which would make for a much neater + * solution to this, doesn't appear until ARMv5) + * + * (tjd April 2002) + */ + +# define REAL_MUL(x, y) real_mul((x),(y)) + +static __inline__ long long real_mul(long long x, long long y) { + const register long long _x asm("r0") = (x); + const register long long _y asm("r2") = (y); + register long long rv asm("r0"); + __asm__ __volatile__ (" +@ we'll need to shift the result at least >> 2 + mov r4, #2 +@ both args >> 7, + mov r0, r0, lsr #7 + mov r2, r2, lsr #7 + orr r0, r0, r1, lsl #25 + orr r2, r2, r3, lsl #25 + mov r1, r1, asr #7 + mov r3, r3, asr #7 +@ check if x needs further shiftage + cmp r1, #0 + cmnne r1, #1 + addeq r4, r4, #8 +@ shift x if needed + movne r0, r0, lsr #8 + orrne r0, r0, r1, lsl #24 + movne r1, r1, asr #8 +@ check if y needs further shiftage + cmp r3, #0 + cmnne r3, #1 + addeq r4, r4, #8 +@ shift y if needed + movne r2, r2, lsr #8 + orrne r2, r2, r3, lsl #24 + movne r3, r3, asr #8 +@ multiply (ignoring overflow) + mul r3, r0, r3 + mla r3, r1, r2, r3 + umull r0, r1, r2, r0 + add r1, r1, r3 +@ shift the result the appropriate amount + rsb r3, r4, #32 + mov r0, r0, lsr r4 + orr r0, r0, r1, lsl r3 + mov r1, r1, asr r4" + : "=r" (rv) + : "0" (_x), "r" (_y) + : "r1", "r3", "r4", "cc"); + return rv; +} + #else # define real double