Looks like a very useful improvement.
Post by Sturla Moldenhttp://svn.scipy.org/svn/scipy/trunk/scipy/signal/lfilter.c.src
I got some inspiration for reusing some of my other C code to
reimplement scipy.signal.lfilter. I basically just had to write
static void lfilter_1d(double *restrict b, double *restrict a,
double *x, double *y, double *zi, npy_intp K, npy_intp N)
{
double Z[K];
double *restrict pz = zi, *restrict z = Z;
register npy_intp n, k;
for (n=0; n<N; n++) {
register double yn, xn = x[n];
yn = y[n] = b[0] * xn + pz[0];
for (k=0; k<K-1; k++)
z[k] = b[k+1] * xn + pz[k+1] - a[k+1]*yn;
z[K-1] = b[K]*xn - a[K]*yn;
double *tmp = pz;
pz = z;
z = tmp;
}
if (pz != zi) memcpy(zi, pz, K*sizeof(double)); }
1D array, 1000000 floats: 150%
2D array, 1000 x 1000 floats, axis=0: 80%
2D array, 1000 x 1000 floats, axis=1: 150%
It is faster than lfilter except for 2D with axis=0. For
2D with axis=1 it is probably faster than lfilter due to
copy-in copy-out optimization. For arrays with more than
1D array, 1000000 floats: 150%
2D array, 1000 x 1000 floats, axis=0: 160%
2D array, 1000 x 1000 floats, axis=1: 250%
Multithreading obviously does not help for filtering single
vector, as there is no work to share.
Some other improvements over signal.lfilter, apart from readable
* GIL is released. The C code is not littered with Python C
API calls that prevents this.
* Filtering can be done in reverse, i.e. no need for fliplr or
flipud for filtering backwards.
* Arrays can be filtered in-place.
* Cleaned up docstring.
Still it only work as a "proof of concept" with double precision
floats. Adding support for other dtypes would be easy, just
float
float _Complex
double _Complex
long double
long double _Complex
Regards,
Sturla Molden
# Copyright (C) 2009 Sturla Molden
import numpy as np
cimport numpy as np
cdef extern int _linear_filter( object a, object b,
object x, object y, object z,
int axis, int direction)
def lfilter(object B, object A, object X,
object zi=None,
object axis = None,
int direction=1,
"""
Filter data along one-dimension with an IIR or FIR filter.
Description
===========
Filter a data sequence, x, using a digital filter. This works for
many
fundamental data types (including Object type). The filter is a
direct
form II transposed implementation of the standard difference equation
(see "Algorithm").
=======
b : vector-like
The numerator coefficient vector in a 1-D sequence.
a : vector-like
The denominator coefficient vector in a 1-D sequence. If a[0]
is not 1, then both a and b are normalized by a[0].
x : array-like
An N-dimensional input array.
axis : int or None
The axis of the input data array along which to apply the
linear filter. The filter is applied to each subarray along
this axis. If axis is None, the filter is applied to x
flattened. Default: axis=None.
zi : array-like
Initial conditions for the filter delays. It is a vector
(or array of vectors for an N-dimensional input) of length
max(len(a),len(b)). If zi=None or is not given then initial
rest is assumed. If axis is None, zi should be a vector
of length K = max(M,N), where M=len(b)-1 and N=len(a)-1. If
an axis is not None, zi should either be a vector of length
K, or an array with same shape as x, except for zi.shape[axis]
which should be K. If zi is a vector, the same initial
conditions are applied to each vector along the specified
axis. Default: zi=None (initial rest).
See signal.lfiltic for more information.
direction : int
If negative, the filter is applied in reverse direction
along the axis. Default: direction=1 (filter forward).
inplace : bool
If True, allow x or zi to be modified inplace if possible.
Default: False.
Outputs: (y, {zf})
=======
y : ndarray
The output of the digital filter.
zf : ndarray
If zi is None, this is not returned, otherwise, zf holds the
final filter delay values.
==========
The filter function is implemented as a direct II transposed
structure.
This means that the filter implements
a[0]*y[n] = b[0]*x[n] + b[1]*x[n-1] + ... + b[nb]*x[n-nb]
- a[1]*y[n-1] - ... - a[na]*y[n-na]
y[m] = b[0]*x[m] + z[0,m-1]
z[0,m] = b[1]*x[m] + z[1,m-1] - a[1]*y[m]
...
z[n-3,m] = b[n-2]*x[m] + z[n-2,m-1] - a[n-2]*y[m]
z[n-2,m] = b[n-1]*x[m] - a[n-1]*y[m]
where m is the output sample number and n=max(len(a),len(b)) is the
model order.
The rational transfer function describing this filter in the
z-transform domain is
-1 -nb
b[0] + b[1]z + ... + b[nb] z
Y(z) = ---------------------------------- X(z)
-1 -na
a[0] + a[1]z + ... + a[na] z
"""
cdef np.ndarray b, a, x, y, z
cdef np.npy_intp i, ierr
cdef double tmp
cdef object K, zshape, iaxis
# get filter coeffs b, a
A = np.asarray(A)
B = np.asarray(B)
raise ValueError, 'a and b must be vectors'
K = max(len(A)-1,len(B)-1)
b = np.zeros(K+1)
a = np.zeros(K+1)
a[:A.shape[0]] = A[:]
b[:B.shape[0]] = B[:]
# normalize by a[0]
tmp = a[0]
a[i] /= tmp
b[i] /= tmp
# set up input signal
X = np.asarray(X, dtype=np.float64)
X = X.ravel()
raise ValueError, 'axis must be None or an integer >= 0'
raise ValueError, 'axis must be None or an integer >= 0'
x = X
# set up output signal
y = X
y = np.zeros(X.shape, dtype=np.float64)
# set up filter delay buffer
iaxis = 0 if (axis is None) else axis
# zi not specified, assume initial rest
zshape = list(X.shape)
zshape[iaxis] = K
z = np.zeros(zshape, dtype=np.float64)
zi = np.asarray(zi, dtype=np.float64)
# x and zi are both vectors
raise ValueError, 'length of zi must be K'
z = zi
z = zi.copy()
# x is nd array, zi is vector
# broadcast zi (cannot modify inplace)
raise ValueError, 'length of zi must be K'
zshape = list(X.shape)
zshape[iaxis] = K
z = np.zeros(zshape, dtype=np.float64)
zshape = [1] * X.ndim
zshape[iaxis] = K
z = z + zi.reshape(zshape)
# x and zi are both nd arrays
zshape = list(X.shape)
zshape[iaxis] = K
zshape = tuple(zshape)
raise ValueError, ('bad shape of zi: expected %r, got %r' %
(zshape,zi.shape))
z = zi
z = zi.copy()
# apply the linear filter
ierr = _linear_filter(a, b, x, y, z, <int> iaxis, direction)
# check for error conditions on return
raise MemoryError, 'malloc returned NULL'
raise ValueError, 'shape of x, y, and z did not match... should
never happen, debug please.'
raise ValueError, 'shape of a and b did not match... should never
happen, debug please.'
# return y or (y, z) depending on zi
# return (y if (zi is None) else (y, z)) ## Cython fails on this
return y
return (y,z)
/* Copyright (C) 2009 Sturla Molden */
#include <Python.h>
#include <numpy/arrayobject.h>
#include <string.h>
#include <stdlib.h>
typedef PyArrayObject ndarray; /* PyArrayObject is an ugly name */
typedef Py_ssize_t integer; /* Py_ssize_t of int for 64 bit support,
npy_intp is typedef'ed incorrectly. */
/* copy data to and from temporary work arrays */
static void copy_in(double *restrict y, double *restrict x, integer n,
integer s)
{
integer i;
char *tmp;
if (s == sizeof(double))
memcpy(y, x, n*sizeof(double));
else {
tmp = (char *)x;
for (i=0; i<n; i++) {
*y++ = *x;
tmp += s;
x = (double *)tmp;
}
}
}
static void copy_out(double *restrict y, double *restrict x, integer n,
integer s)
{
integer i;
char *tmp;
if (s == sizeof(double))
memcpy(y, x, n*sizeof(double));
else {
tmp = (char *)y;
for (i=0; i<n; i++) {
*y = *x++;
tmp += s;
y = (double *)tmp;
}
}
}
/* this computes the start adress for every vector along a dimension
(axis) of an ndarray */
inline char *datapointer(ndarray *a, integer *indices)
{
char *data = a->data;
int i;
integer j;
for (i=0; i < a->nd; i++) {
j = indices[i];
data += j * a->strides[i];
}
return data;
}
static double ** _get_axis_pointer(integer *indices, int axis,
ndarray *a, double **axis_ptr_array, int dim)
{
/* recursive axis pointer search for 4 dimensions or more */
integer i;
double *axis_ptr;
if (dim == a->nd) {
/* done recursing dimensions,
compute address of this axis */
axis_ptr = (double *) datapointer(a, indices);
*axis_ptr_array = axis_ptr;
return (axis_ptr_array + 1);
} else if (dim == axis) {
/* use first element only */
indices[dim] = 0;
axis_ptr_array = _get_axis_pointer(indices, axis, a, axis_ptr_array,
dim+1);
return axis_ptr_array;
} else {
/* iterate range of indices */
integer length = a->dimensions[dim];
for (i=0; i < length; i++) {
indices[dim] = i;
axis_ptr_array = _get_axis_pointer(indices, axis, a,
axis_ptr_array, dim+1);
}
return axis_ptr_array;
}
}
static double **get_axis_pointers(ndarray *a, int axis, integer *count)
{
integer indices[a->nd];
double **out, **tmp;
integer i, j;
j = 1;
for (i=0; i < a->nd; i++) {
if (i != axis)
j *= a->dimensions[i];
}
*count = j;
out = (double **) malloc(*count * sizeof(double*));
if (out == NULL) {
*count = 0;
return NULL;
}
tmp = out;
switch (a->nd) {
/* for one dimension we just return the data pointer */
*tmp = (double *)a->data;
break;
/* specialized for two dimensions */
switch (axis) {
for (i=0; i<a->dimensions[1]; i++)
*tmp++ = (double *)(a->data + i*a->strides[1]);
break;
for (i=0; i<a->dimensions[0]; i++)
*tmp++ = (double *)(a->data + i*a->strides[0]);
break;
}
break;
/* specialized for three dimensions */
switch (axis) {
for (i=0; i<a->dimensions[1]; i++)
for (j=0; j<a->dimensions[2]; j++)
*tmp++ = (double *)(a->data + i*a->strides[1] +
j*a->strides[2]);
break;
for (i=0; i<a->dimensions[0]; i++)
for (j=0; j<a->dimensions[2]; j++)
*tmp++ = (double *)(a->data + i*a->strides[0] +
j*a->strides[2]);
break;
for (i=0; i<a->dimensions[0]; i++)
for (j=0; j<a->dimensions[1]; j++)
*tmp++ = (double *)(a->data + i*a->strides[0] +
j*a->strides[1]);
}
break;
/* four or more dimensions: use recursion */
for (i=0; i<a->nd; i++) indices[i] = 0;
_get_axis_pointer(indices, axis, a, out, 0);
}
return out;
}
/* applies a linear filter along one dimension */
static void lfilter_1d( double *restrict b, double *restrict a,
double *x, double *y,
double *zi,
integer K, integer N)
{
double Z[K];
double *restrict pz = zi, *restrict z = Z;
register integer n, k;
for (n=0; n<N; n++) {
register double yn, xn = x[n];
yn = y[n] = b[0] * xn + pz[0];
for (k=0; k<K-1; k++)
z[k] = b[k+1] * xn + pz[k+1] - a[k+1]*yn;
z[K-1] = b[K]*xn - a[K]*yn;
double *tmp = pz;
pz = z;
z = tmp;
}
if (pz != zi)
memcpy(zi, pz, K*sizeof(double));
}
static void rev_lfilter_1d( double *restrict b, double *restrict a,
double *restrict x, double *restrict y,
double *zi,
integer K, integer N)
{
double Z[K];
double *restrict pz = zi, *restrict z = Z;
register integer n, k;
for (n=N-1; n >= 0; n--) {
register double yn, xn = x[n];
yn = y[n] = b[0] * xn + pz[0];
for (k=0; k<K-1; k++)
z[k] = b[k+1]*xn + pz[k+1] - a[k+1]*yn;
z[K-1] = b[K]*xn - a[K]*yn;
double *tmp = pz;
pz = z;
z = tmp;
}
if (pz != zi)
memcpy(zi, pz, K*sizeof(double));
}
typedef void (*lfilterfun_t)(double *restrict b, double *restrict a,
double *restrict x, double *restrict y,
double *zi, integer K, integer N);
int _linear_filter(ndarray *a, ndarray *b,
ndarray *x, ndarray *y, ndarray *z,
int axis,
int direction)
{
int retval = 0;
double *wx, *wy, *wz, *data_a, *data_b;
integer xcount, ycount, zcount;
integer xstride, ystride, zstride;
double *xdata, *ydata, *zdata;
double **x_axis_ptrs = NULL, **y_axis_ptrs = NULL, **z_axis_ptrs = NULL;
lfilterfun_t lfilter;
integer i, K, N;
/* release the GIL */
Py_BEGIN_ALLOW_THREADS
/* sanity check on a and b (these should never fail) */
if (a->nd != 1) goto error_ab;
if (b->nd != 1) goto error_ab;
if (b->dimensions[0] != a->dimensions[0]) goto error_ab;
data_a = (double *)a->data;
data_b = (double *)b->data;
K = a->dimensions[0] - 1;
/* sanity ckeck on x, y and z */
if ((x->nd != y->nd) || (y->nd != z->nd)) goto error_xyz;
for (int d=0; d < a->nd; d++) {
if (d == axis) continue;
if ((x->dimensions[d] != y->dimensions[d])
|| (y->dimensions[d] != z->dimensions[d])) goto error_xyz;
}
N = x->dimensions[axis];
/* extract axis pointers */
x_axis_ptrs = get_axis_pointers(x, axis, &xcount);
y_axis_ptrs = get_axis_pointers(y, axis, &ycount);
z_axis_ptrs = get_axis_pointers(z, axis, &zcount);
/* sanity check */
if (!x_axis_ptrs) goto error_malloc;
if (!y_axis_ptrs) goto error_malloc;
if (!z_axis_ptrs) goto error_malloc;
if ((xcount != ycount)||(ycount != zcount)) goto error_xyz;
/* all ok, we can start ... */
lfilter = direction < 0 ? rev_lfilter_1d : lfilter_1d;
xstride = x->strides[axis];
ystride = y->strides[axis];
zstride = z->strides[axis];
#pragma omp parallel \
firstprivate(lfilter, N, K, xcount, x_axis_ptrs, y_axis_ptrs,
z_axis_ptrs, \
xstride, ystride, zstride, data_b, data_a) \
private(i, xdata, ydata, zdata, wx, wy, wz) \
shared(retval) \
default(none)
{
if ((xstride==sizeof(double))
&& (ystride==sizeof(double))
&& (xstride==sizeof(double)))
{
#pragma omp for schedule(static)
for (i=0; i < xcount; i++) {
xdata = x_axis_ptrs[i];
ydata = y_axis_ptrs[i];
zdata = z_axis_ptrs[i];
/* strictly speaking we could be aliasing
xdata and ydata here, but it should not matter. */
lfilter(data_b, data_a, xdata, ydata, zdata, K, N);
}
} else {
/* axis is strided, use copy-in copy-out */
wx = (double *)malloc((2*N+K)*sizeof(double));
if (!wx) {
#pragma omp critical
retval = -1; /* error_malloc */
}
#pragma omp barrier
#pragma omp flush(retval)
if (retval < 0)
/* malloc failed in some thread */
goto filtering_complete;
wy = wx + N;
wz = wy + N;
#pragma omp for schedule(static)
for (i=0; i < xcount; i++) {
/* get pointe to the first element of the axis */
xdata = x_axis_ptrs[i];
ydata = y_axis_ptrs[i];
zdata = z_axis_ptrs[i];
/* copy to local contiguous buffers */
copy_in(wx, xdata, N, xstride);
copy_in(wz, zdata, K, zstride);
/* filter */
lfilter(data_b, data_a, wx, wy, wz, K, N);
/* copy data back */
copy_out(ydata, wy, N, ystride);
copy_out(zdata, wz, K, zstride);
}
if (wx) free(wx);
}
}
/* we are done... */
goto done;
/* error conditions */
retval = -3;
goto done;
retval = -2;
goto done;
retval = -1;
/* clean up and exit */
if (x_axis_ptrs) free(x_axis_ptrs);
if (y_axis_ptrs) free(y_axis_ptrs);
if (z_axis_ptrs) free(z_axis_ptrs);
/* get the GIL back and return */
Py_END_ALLOW_THREADS
return retval;
}
_______________________________________________
Scipy-dev mailing list
http://mail.scipy.org/mailman/listinfo/scipy-dev