Changeset 2850

Show
Ignore:
Timestamp:
10/20/99 00:19:39 (15 years ago)
Author:
ucacoxh
Message:

Replaced divisions in interpolation converter that were not powers
of two with a multiplication scaling factor and a power of 2
denominator. Improves performance 4-6 fold for upsampling by 3,5,6.

i.e. have sequence that looked like:

x[i] = 2 * last + 1 * new; x[i] /= 3;
x[i] = 1 * last + 2 * new; x[i] /= 3;
x[i] = new;

since 1/3 approximates to 5 / 16, now looks like:

x[i] = 10 * last + 5 * new; x[i] /= 16;
x[i] = 5 * last + 10 * new; x[i] /= 16;
x[i] = 15 * new; x[i] /= 16;

Have to scale last entry so not put jumps in amplitude. I vaguely
recall reading that power of 2 div was quicker the rest. Took a good
while to trace it though since my initial guesses were arithmetic
pipelining and memory alignment ;-)

Not sure we really care about this since the time is trivial anyway,
but gave odd data points in performance measurement for thesis. For
the record, an UltraSPARC-II 296 MHz can upsample 330000 20ms frames
from 8k to 48k in 1 second. Okay, the sound quality is not great, but
that's still a staggering amount of maths.

Location:
rat/trunk
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • rat/trunk/MODS

    r2849 r2850  
    899899          update. 
    900900        - Unrolled loops in extrapolation converter.   
     901        - Replaced divisions in interpolation converter that were not powers 
     902          of two with a multiplication scaling factor and a power of 2  
     903          denominator.  Improves performance 4-6 fold for upsampling by 3,5,6. 
    901904 
    902905TODO -- They're features not bugs dammit! 
  • rat/trunk/convert_linear.c

    r2553 r2850  
    5151        loop = min(src_len/channels, dst_len/(channels*l->scale)); 
    5252 
     53        /* On some platforms divisions by powers of 2 is way quicker */ 
     54        /* than other divisions.  To improve interpolation perf.     */ 
     55        /* approximate fractions which are not powers of two         */ 
     56        /* i.e. 1 / 3 ->  5 / 16                                     */ 
     57        /*      1 / 5 ->  6 / 32                                     */ 
     58        /*      1 / 6 -> 11 / 64                                     */ 
     59 
    5360        switch (l->scale) { 
    5461        case 6: 
     
    5663                        register int il, ic; 
    5764                        il = *last; ic = *sp; 
    58                         r = 5 * il + 1 * ic; r /= 6; *dp = (sample)r; dp += channels; 
    59                         r = 4 * il + 2 * ic; r /= 6; *dp = (sample)r; dp += channels; 
    60                         r = 3 * il + 3 * ic; r /= 6; *dp = (sample)r; dp += channels; 
    61                         r = 2 * il + 4 * ic; r /= 6; *dp = (sample)r; dp += channels; 
    62                         r = 1 * il + 5 * ic; r /= 6; *dp = (sample)r; dp += channels; 
    63                         *dp = (sample)ic; dp += channels; 
     65 
     66                        r = 55 * il + 11 * ic; r /= 64; *dp = (sample)r; dp += channels; 
     67                        r = 44 * il + 22 * ic; r /= 64; *dp = (sample)r; dp += channels; 
     68                        r = 33 * il + 33 * ic; r /= 64; *dp = (sample)r; dp += channels; 
     69                        r = 22 * il + 44 * ic; r /= 64; *dp = (sample)r; dp += channels; 
     70                        r = 11 * il + 55 * ic; r /= 64; *dp = (sample)r; dp += channels; 
     71                        r =           66 * ic; r /= 64; *dp = (sample)r; dp += channels; 
    6472                        last = sp; 
    6573                        sp += channels; 
     
    7078                        register int il, ic; 
    7179                        il = *last; ic = *sp; 
    72                         r = 4 * il + 1 * ic; r /= 5; *dp = (sample)r; dp += channels; 
    73                         r = 3 * il + 2 * ic; r /= 5; *dp = (sample)r; dp += channels; 
    74                         r = 2 * il + 3 * ic; r /= 5; *dp = (sample)r; dp += channels; 
    75                         r = 1 * il + 4 * ic; r /= 5; *dp = (sample)r; dp += channels; 
    76                         *dp = (sample)ic; dp += channels; 
     80                        r = 24 * il +  6 * ic; r /= 32; *dp = (sample)r; dp += channels; 
     81                        r = 18 * il + 12 * ic; r /= 32; *dp = (sample)r; dp += channels; 
     82                        r = 12 * il + 18 * ic; r /= 32; *dp = (sample)r; dp += channels; 
     83                        r =  6 * il + 24 * ic; r /= 32; *dp = (sample)r; dp += channels; 
     84                        r =           30 * ic; r /= 32; *dp = (sample)r; dp += channels; 
    7785                        last = sp; 
    7886                        sp  += channels; 
     
    95103                        register int il, ic; 
    96104                        il = *last; ic = *sp; 
    97                         r = 2 * il + 1 * ic; r /= 3; *dp = (sample)r; dp += channels; 
    98                         r = 1 * il + 2 * ic; r /= 3; *dp = (sample)r; dp += channels; 
    99                         *dp = (sample)ic; dp += channels; 
     105                        r = 10 * il +  5 * ic; r /= 16; *dp = (sample)r; dp += channels; 
     106                        r =  5 * il + 10 * ic; r /= 16; *dp = (sample)r; dp += channels; 
     107                        r =           15 * ic; r /= 16; *dp = (sample)r; dp += channels; 
    100108                        last = sp; 
    101109                        sp  += channels;