Re: Convert to float and more
Re: Convert to float and more
- Subject: Re: Convert to float and more
- From: Sol Friedman <email@hidden>
- Date: Thu, 5 Jun 2003 16:53:38 -0700
Hi -
Here is some Altivec code to convert 16 bit integers to floats. A
mono input stream is converted and written to 2 mono streams. Samples
must be 16-byte aligned (both input pointer and output pointer). If
either isn't aligned, there are some additional maneuvers necessary.
Below this code is code that handles a stereo interleaved input,
converting this to 2 mono output streams. If you're using 32 bit
integers in either case, the unpack instructions are unnecessary.
-Sol
// Altivec
// Copy samples from inPtr to outPtr
void copyMonoVector(UInt32 framesToDo, SInt16 *src1, float *dstl, float
*dstr) {
vector signed short *v_inSamplesShort = (vector signed short *)(src1);
vector signed int v_inSamplesLong0, v_inSamplesLong1,
v_inSamplesLong2, v_inSamplesLong3;
vector signed int v_inSamplesLong4, v_inSamplesLong5,
v_inSamplesLong6, v_inSamplesLong7;
vector float *v_outSamplesL = (vector float *)(dstl);
vector float *v_outSamplesR = (vector float *)(dstr);
vector signed short srcv0, srcv1, srcv2, srcv3;
vector float dstv0, dstv1, dstv2, dstv3, dstv4, dstv5, dstv6, dstv7;
while (framesToDo > 0) {
// load the source vectors
srcv0 = *v_inSamplesShort++;
srcv1 = *v_inSamplesShort++;
srcv2 = *v_inSamplesShort++;
srcv3 = *v_inSamplesShort++;
// unpack first 4 shorts
v_inSamplesLong0 = vec_unpackh(srcv0);
v_inSamplesLong1 = vec_unpackh(srcv1);
v_inSamplesLong2 = vec_unpackh(srcv2);
v_inSamplesLong3 = vec_unpackh(srcv3);
// unpack second 4 shorts
v_inSamplesLong4 = vec_unpackl(srcv0);
v_inSamplesLong5 = vec_unpackl(srcv1);
v_inSamplesLong6 = vec_unpackl(srcv2);
v_inSamplesLong7 = vec_unpackl(srcv3);
// convert to floating point
dstv0 = vec_ctf(v_inSamplesLong0,15);
dstv1 = vec_ctf(v_inSamplesLong1,15);
dstv2 = vec_ctf(v_inSamplesLong2,15);
dstv3 = vec_ctf(v_inSamplesLong3,15);
dstv4 = vec_ctf(v_inSamplesLong4,15);
dstv5 = vec_ctf(v_inSamplesLong5,15);
dstv6 = vec_ctf(v_inSamplesLong6,15);
dstv7 = vec_ctf(v_inSamplesLong7,15);
// write out first 4 results to both left and right
*v_outSamplesL++ = dstv0;
*v_outSamplesR++ = dstv0;
*v_outSamplesL++ = dstv4;
*v_outSamplesR++ = dstv4;
*v_outSamplesL++ = dstv1;
*v_outSamplesR++ = dstv1;
*v_outSamplesL++ = dstv5;
*v_outSamplesR++ = dstv5;
*v_outSamplesL++ = dstv2;
*v_outSamplesR++ = dstv2;
*v_outSamplesL++ = dstv6;
*v_outSamplesR++ = dstv6;
*v_outSamplesL++ = dstv3;
*v_outSamplesR++ = dstv3;
*v_outSamplesL++ = dstv7;
*v_outSamplesR++ = dstv7;
framesToDo -= 32;
}
}
void copyStereoVector(UInt32 framesToDo, SInt16 *src1, float *dstl,
float *dstr) {
vector signed short *v_inSamplesShort = (vector signed short *)(src1);
vector signed int v_inSamplesLong0, v_inSamplesLong1,
v_inSamplesLong2, v_inSamplesLong3;
vector signed int v_inSamplesLong4, v_inSamplesLong5,
v_inSamplesLong6, v_inSamplesLong7;
vector float *v_outSamplesL = (vector float *)(dstl);
vector float *v_outSamplesR = (vector float *)(dstr);
vector signed short srcv0, srcv1, srcv2, srcv3;
vector float dstv0, dstv1, dstv2, dstv3, dstv4, dstv5, dstv6, dstv7;
vector signed short v_temp0, v_temp1, v_temp2, v_temp3, v_temp4,
v_temp5, v_temp6, v_temp7;
const vector unsigned char perml = (vector unsigned char) (0, 1, 4, 5,
8, 9, 12, 13,
16, 17, 20, 21, 24, 25, 28, 29);
const vector unsigned char permr = (vector unsigned char) (2, 3, 6, 7,
10, 11, 14, 15,
18, 19, 22, 23, 26, 27, 30, 31);
while (framesToDo > 0) {
// load raw interleaved vectors into temporary vector registers
v_temp0 = *v_inSamplesShort++; // load 8 interleaved shorts
v_temp1 = *v_inSamplesShort++; // load 8 interleaved shorts
v_temp2 = *v_inSamplesShort++; // load 8 interleaved shorts
v_temp3 = *v_inSamplesShort++; // load 8 interleaved shorts
v_temp4 = *v_inSamplesShort++; // load 8 interleaved shorts
v_temp5 = *v_inSamplesShort++; // load 8 interleaved shorts
v_temp6 = *v_inSamplesShort++; // load 8 interleaved shorts
v_temp7 = *v_inSamplesShort++; // load 8 interleaved shorts
// first process left channel
srcv0 = vec_perm(v_temp0, v_temp1, perml); // deinterleave to 8 left
shorts - permute with left chanel mask "perml"
srcv1 = vec_perm(v_temp2, v_temp3, perml); // deinterleave next 8
left shorts
srcv2 = vec_perm(v_temp4, v_temp5, perml); // deinterleave next 8
left shorts
srcv3 = vec_perm(v_temp6, v_temp7, perml); // deinterleave next 8
left shorts
// unpack first 4 shorts (aligned)
v_inSamplesLong0 = vec_unpackh(srcv0);
v_inSamplesLong1 = vec_unpackh(srcv1);
v_inSamplesLong2 = vec_unpackh(srcv2);
v_inSamplesLong3 = vec_unpackh(srcv3);
// unpack second 4 shorts
v_inSamplesLong4 = vec_unpackl(srcv0);
v_inSamplesLong5 = vec_unpackl(srcv1);
v_inSamplesLong6 = vec_unpackl(srcv2);
v_inSamplesLong7 = vec_unpackl(srcv3);
// convert to floating point
dstv0 = vec_ctf(v_inSamplesLong0,15);
dstv1 = vec_ctf(v_inSamplesLong1,15);
dstv2 = vec_ctf(v_inSamplesLong2,15);
dstv3 = vec_ctf(v_inSamplesLong3,15);
dstv4 = vec_ctf(v_inSamplesLong4,15);
dstv5 = vec_ctf(v_inSamplesLong5,15);
dstv6 = vec_ctf(v_inSamplesLong6,15);
dstv7 = vec_ctf(v_inSamplesLong7,15);
// write out 8 results to left output
*v_outSamplesL++ = dstv0;
*v_outSamplesL++ = dstv4;
*v_outSamplesL++ = dstv1;
*v_outSamplesL++ = dstv5;
*v_outSamplesL++ = dstv2;
*v_outSamplesL++ = dstv6;
*v_outSamplesL++ = dstv3;
*v_outSamplesL++ = dstv7;
// now process right channel
srcv0 = vec_perm(v_temp0, v_temp1, permr); // permute with right
chanel mask "permr"
srcv1 = vec_perm(v_temp2, v_temp3, permr); // deinterleave next 8
left shorts
srcv2 = vec_perm(v_temp4, v_temp5, permr); // deinterleave next 8
left shorts
srcv3 = vec_perm(v_temp6, v_temp7, permr); // deinterleave next 8
left shorts
// unpack first 4 shorts (aligned)
v_inSamplesLong0 = vec_unpackh(srcv0);
v_inSamplesLong1 = vec_unpackh(srcv1);
v_inSamplesLong2 = vec_unpackh(srcv2);
v_inSamplesLong3 = vec_unpackh(srcv3);
// unpack second 4 shorts
v_inSamplesLong4 = vec_unpackl(srcv0);
v_inSamplesLong5 = vec_unpackl(srcv1);
v_inSamplesLong6 = vec_unpackl(srcv2);
v_inSamplesLong7 = vec_unpackl(srcv3);
// convert to floating point
dstv0 = vec_ctf(v_inSamplesLong0,15);
dstv1 = vec_ctf(v_inSamplesLong1,15);
dstv2 = vec_ctf(v_inSamplesLong2,15);
dstv3 = vec_ctf(v_inSamplesLong3,15);
dstv4 = vec_ctf(v_inSamplesLong4,15);
dstv5 = vec_ctf(v_inSamplesLong5,15);
dstv6 = vec_ctf(v_inSamplesLong6,15);
dstv7 = vec_ctf(v_inSamplesLong7,15);
// write out 8 results to right output
*v_outSamplesR++ = dstv0;
*v_outSamplesR++ = dstv4;
*v_outSamplesR++ = dstv1;
*v_outSamplesR++ = dstv5;
*v_outSamplesR++ = dstv2;
*v_outSamplesR++ = dstv6;
*v_outSamplesR++ = dstv3;
*v_outSamplesR++ = dstv7;
framesToDo -= 32;
}
}
Sol Friedman
Professional Applications
Apple Computer, Inc.
_______________________________________________
coreaudio-api mailing list | email@hidden
Help/Unsubscribe/Archives:
http://www.lists.apple.com/mailman/listinfo/coreaudio-api
Do not post admin requests to the list. They will be ignored.