Merged master

2017-04-12 23:41:58 +02:00 · 2017-04-12 23:41:58 +02:00 · f55c4c35c3
commit f55c4c35c3
parent 6aa55f8195 47333b6690
27 changed files with 9021 additions and 3178 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,9 @@
 csdr
 nmux
 ddcd
 *.o
 *.so
 tags
 dumpvect.*.vect
 grc_tests/top_block.py
 *.swp
--- a/20
+++ b/20
@ -26,8 +26,6 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 LIBSOURCES =  fft_fftw.c libcsdr_wrapper.c
 #SOURCES = csdr.c $(LIBSOURCES)
 cpufeature = $(if $(findstring $(1),$(shell cat /proc/cpuinfo)),$(2))
@ -41,29 +39,39 @@ PARAMS_LOOPVECT = -O3 -ffast-math -fdump-tree-vect-details -dumpbase dumpvect
 PARAMS_LIBS = -g -lm -lrt -lfftw3f -DUSE_FFTW -DLIBCSDR_GPL -DUSE_IMA_ADPCM
 PARAMS_SO = -fpic
 PARAMS_MISC = -Wno-unused-result
 #DEBUG_ON = 0 #debug is always on by now (anyway it could be compiled with `make DEBUG_ON=1`)
 #PARAMS_DEBUG = $(if $(DEBUG_ON),-g,)
 FFTW_PACKAGE = fftw-3.3.3
 SO_EXT = $(if $(findstring CYGWIN,$(shell uname -a)),dll,so)
 SO_PATH = $(if $(findstring CYGWIN,$(shell uname -a)),/bin,/usr/lib)
-all: clean-vect
+.PHONY: clean-vect clean
 all: csdr nmux
 libcsdr.so: fft_fftw.c fft_rpi.c libcsdr_wrapper.c libcsdr.c libcsdr_gpl.c fastddc.c fastddc.h  fft_fftw.h  fft_rpi.h  ima_adpcm.h  libcsdr_gpl.h  libcsdr.h  predefined.h
 	@echo NOTE: you may have to manually edit Makefile to optimize for your CPU \(especially if you compile on ARM, please edit PARAMS_NEON\).
 	@echo Auto-detected optimization parameters: $(PARAMS_SIMD)
 	@echo
 	rm -f dumpvect*.vect
 	gcc -std=gnu99 $(PARAMS_LOOPVECT) $(PARAMS_SIMD) $(LIBSOURCES) $(PARAMS_LIBS) $(PARAMS_MISC) -fpic -shared -o libcsdr.$(SO_EXT)
 	-./parsevect dumpvect*.vect
 csdr: csdr.c libcsdr.so
 	gcc -std=gnu99 $(PARAMS_LOOPVECT) $(PARAMS_SIMD) csdr.c $(PARAMS_LIBS) -L. -lcsdr $(PARAMS_MISC) -o csdr
 ddcd: ddcd.cpp libcsdr.so ddcd.h
 	g++ $(PARAMS_LOOPVECT) $(PARAMS_SIMD) ddcd.cpp $(PARAMS_LIBS) -L. -lcsdr -lpthread $(PARAMS_MISC) -o ddcd
 nmux: nmux.cpp libcsdr.so nmux.h tsmpool.cpp tsmpool.h
 	g++ $(PARAMS_LOOPVECT) $(PARAMS_SIMD) nmux.cpp tsmpool.cpp $(PARAMS_LIBS) -L. -lcsdr -lpthread $(PARAMS_MISC) -o nmux
 arm-cross: clean-vect
 	#note: this doesn't work since having added FFTW
 	arm-linux-gnueabihf-gcc -std=gnu99 -O3 -fshort-double -ffast-math -dumpbase dumpvect-arm -fdump-tree-vect-details -mfloat-abi=softfp -march=armv7-a -mtune=cortex-a9 -mfpu=neon -mvectorize-with-neon-quad -Wno-unused-result -Wformat=0 $(SOURCES) -lm -o ./csdr
 clean-vect:
 	rm -f dumpvect*.vect
 clean: clean-vect
-	rm -f libcsdr.$(SO_EXT) csdr
+	rm -f libcsdr.$(SO_EXT) csdr nmux
 install:
 	install -m 0755 libcsdr.$(SO_EXT) $(SO_PATH)
 	install -m 0755 csdr /usr/bin
 	install -m 0755 csdr-fm /usr/bin
 	install -m 0755 nmux /usr/bin
 	-ldconfig
 uninstall:
 	rm $(SO_PATH)/libcsdr.$(SO_EXT) /usr/bin/csdr /usr/bin/csdr-fm
@ -85,7 +93,7 @@ emcc-get-deps:
 	emmake make; \
 	emmake make install
 emcc:
-	emcc -O3 -Isdr.js/$(FFTW_PACKAGE)/api -Lsdr.js/$(FFTW_PACKAGE)/emscripten-lib -o sdr.js/sdrjs-compiled.js fft_fftw.c libcsdr_wrapper.c -DLIBCSDR_GPL -DUSE_IMA_ADPCM -DUSE_FFTW -lfftw3f -s EXPORTED_FUNCTIONS="`python sdr.js/exported_functions.py`"
+	emcc -O3 -Isdr.js/$(FFTW_PACKAGE)/api -Lsdr.js/$(FFTW_PACKAGE)/emscripten-lib -o sdr.js/sdrjs-compiled.js fft_fftw.c libcsdr_wrapper.c -s TOTAL_MEMORY=67108864 -DLIBCSDR_GPL -DUSE_IMA_ADPCM -DUSE_FFTW -lfftw3f -s EXPORTED_FUNCTIONS="`python sdr.js/exported_functions.py`"
 	cat sdr.js/sdrjs-header.js sdr.js/sdrjs-compiled.js sdr.js/sdrjs-footer.js > sdr.js/sdr.js
 emcc-beautify:
 	bash -c 'type js-beautify >/dev/null 2>&1; if [ $$? -eq 0 ]; then js-beautify sdr.js/sdr.js >sdr.js/sdr.js.beautiful; mv sdr.js/sdr.js.beautiful sdr.js/sdr.js; fi'
--- a/README.md
+++ b/README.md
@ -205,7 +205,7 @@ Internally, a sine and cosine wave is generated to perform this function, and th
 	shift_addition_cc <rate>
-Operation is the same as with `shift_math_cc`.
+Operation is the same as for `shift_math_cc`.
 Internally, this function uses trigonometric addition formulas to generate sine and cosine, which is a bit faster. (About 4 times on the machine I have tested it on.)
@ -219,6 +219,20 @@ Operation is the same as with `shift_math_cc`.
 Internally, this function uses a look-up table (LUT) to recall the values of the sine function (for the first quadrant).
 The higher the table size is, the smaller the phase error is.
 	shift_addfast_cc <rate>
 Operation is the same as for `shift_math_cc`.
 Internally, this function uses a NEON-accelerated algorithm on capable systems, so it is advised to use this one on ARM boards.
 	shift_unroll_cc <rate>
 Operation is the same as for `shift_math_cc`.
 This uses a modified algoritm that first stores a vector of sine and cosine values for given phase differences.
 The loop in this function unrolls quite well if compiled on a PC. It was the fastest one on an i7 CPU during the tests.
 	decimating_shift_addition_cc <rate> [decimation]
 It shifts the input signal in the frequency domain, and also decimates it, without filtering. It will be useful as a part of the FFT channelizer implementation (to be done).
@ -298,11 +312,16 @@ The output sample rate will be `interpolation / decimation × input_sample_rate`
 `transition_bw` and `window` are the parameters of the filter.
-	fractional_decimator_ff <decimation_rate> [transition_bw [window]]
+	fractional_decimator_ff <decimation_rate> [num_poly_points ( [transition_bw [window]] | --prefilter )]
 It can decimate by a floating point ratio.
-`transition_bw` and `window` are the parameters of the filter.
+It uses Lagrance interpolation, where `num_poly_points` (12 by default) input samples are taken into consideration while calculating one output sample. 
 It can filter the signal with an anti-aliasing FIR filter before applying the Lagrange interpolation. This filter is inactive by default, but can be activated by:
 * passing only the `transition_bw`, or both the `transition_bw` and the `window` parameters of the filter,
 * using the `--prefilter` switch after `num_poly_points` to switch this filter on with the default parameters.
 	bandpass_fir_fft_cc <low_cut> <high_cut> <transition_bw> [window]
@ -310,6 +329,10 @@ It performs a bandpass FIR filter on complex samples, using FFT and the overlap-
 Parameters are described under `firdes_bandpass_c` and `firdes_lowpass_f`.
 	old_fractional_decimator_ff <decimation_rate> [num_poly_points [transition_bw [window]]]
 This is the deprecated, old version of `fractional_decimator_ff` (only uses linear interpolation, its filter cuts at 59% of the passband).
 	agc_ff [hang_time [reference [attack_rate [decay_rate [max_gain [attack_wait [filter_alpha]]]]]]]
 It is an automatic gain control function.
@ -501,6 +524,15 @@ To remove *sdr.js* and the compiled dependencies:
 	make emcc-clean
 ## [nmux] (#nmux)
 The repo also contains a command line tool called `nmux`, which is a TCP stream multiplexer. It reads data from the standard input, and sends it to each client connected through TCP sockets. Available command line options are:
 * `--port (-p), --address (-a):` TCP port and address to listen.
 * `--bufsize (-b), --bufcnt (-n)`: Internal buffer size and count.
 * `--help (-h)`: Show help message.
 `nmux` was originally written for use in OpenWebRX.
 ## [Licensing] (#licensing)
 Most of the code of `libcsdr` is under BSD license.  
--- a/csdr.c
+++ b/csdr.c
@ -48,7 +48,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "ima_adpcm.h"
 #include <sched.h>
 #include <math.h>
 #include <strings.h>
 #include <errno.h>
 #include "fastddc.h"
 char usage[]=
 "csdr - a simple commandline tool for Software Defined Radio receiver DSP.\n\n"
@ -95,7 +97,8 @@ char usage[]=
 "    agc_ff [hang_time [reference [attack_rate [decay_rate [max_gain [attack_wait [filter_alpha]]]]]]]\n"
 "    fastagc_ff [block_size [reference]]\n"
 "    rational_resampler_ff <interpolation> <decimation> [transition_bw [window]]\n"
-"    fractional_decimator_ff <decimation_rate> [transition_bw [window]]\n"
+"    old_fractional_decimator_ff <decimation_rate> [transition_bw [window]]\n"
 "    fractional_decimator_ff <decimation_rate> [num_poly_points ( [transition_bw [window]] | --prefilter )]\n"
 "    fft_cc <fft_size> <out_of_every_n_samples> [window [--octave] [--benchmark]]\n"
 "    logpower_cf [add_db]\n"
 "    fft_benchmark <fft_size> <fft_cycles> [--benchmark]\n"
@ -133,13 +136,7 @@ int bigbufs = 0;
 //change on on 2015-08-29: we don't yield at all. fread() will do it if it blocks
 #define YIELD_EVERY_N_TIMES 3
 //#define TRY_YIELD if(++yield_counter%YIELD_EVERY_N_TIMES==0) sched_yield()
 #define TRY_YIELD
 #ifdef __CYGWIN__
 #pragma message "we go the cyg way"
 #undef TRY_YIELD
 int flush_cntr = 0;
 #define TRY_YIELD fflush(stdout); sched_yield()
 #endif
 //unsigned yield_counter=0;
 int badsyntax(char* why)
@ -191,6 +188,22 @@ int init_fifo(int argc, char *argv[])
 			fcntl(fd, F_SETFL, flags | O_NONBLOCK);
 			return fd;
 		}
 		else if(!strcmp(argv[2],"--fd"))  
 		{
 			//to use this:
 			//1. Create a pipe(pipedesc) in your process.
 			//2. fork() and execl() your process to run csdr, and give pipedesc[0] as parameter after --fd 
 			//  Note: when forking, the child process will get a copy of the file descriptor table! That's why this 
 			//  works at all, as file descriptor indexes are normally not transferable between processes, except for a *NIX socket way which is quite complicated... 
 			//3. From your parent process, write into pipedesc[1].
 			//This is implemented in ddcd, check there to see how to do it!
 			int fd;
 			if(sscanf(argv[3], "%d",&fd)<=0) return 0;
 			fprintf(stderr,"csdr: fd control mode on, fd=%d\n", fd);
 			int flags = fcntl(fd, F_GETFL, 0);
 			fcntl(fd, F_SETFL, flags | O_NONBLOCK);
 			return fd;
 		}
 	}
 	return 0;
 }
@ -616,6 +629,7 @@ int main(int argc, char *argv[])
 		{
 			FEOF_CHECK;
 			if(!FREAD_C) break;
 			starting_phase=shift_math_cc((complexf*)input_buffer, (complexf*)output_buffer, the_bufsize, rate, starting_phase);
 			FWRITE_C;
 			TRY_YIELD;
 		}
@ -649,6 +663,105 @@ int main(int argc, char *argv[])
 		return 0;
 	}
 	if(!strcmp(argv[1],"shift_addfast_cc"))
 	{
 		bigbufs=1;
 		float starting_phase=0;
 		float rate;
 		int fd;
 		if(fd=init_fifo(argc,argv))
 		{
 			while(!read_fifo_ctl(fd,"%g\n",&rate)) usleep(10000);
 		}
 		else
 		{
 			if(argc<=2) return badsyntax("need required parameter (rate)"); 
 			sscanf(argv[2],"%g",&rate);
 		}
 		if(!sendbufsize(initialize_buffers())) return -2;
 		for(;;)
 		{
 			shift_addfast_data_t data=shift_addfast_init(rate);
 			fprintf(stderr,"shift_addfast_cc: reinitialized to %g\n",rate);
 			int remain, current_size;
 			float* ibufptr;
 			float* obufptr;
 			for(;;)
 			{
 				FEOF_CHECK;
 				if(!FREAD_C) break;
 				remain=the_bufsize;
 				ibufptr=input_buffer;
 				obufptr=output_buffer;
 				while(remain)
 				{
 					current_size=(remain>1024)?1024:remain;
 					starting_phase=shift_addfast_cc((complexf*)ibufptr, (complexf*)obufptr, current_size, &data, starting_phase);
 					ibufptr+=current_size*2;
 					obufptr+=current_size*2;
 					remain-=current_size;
 				}
 				FWRITE_C;
 				if(read_fifo_ctl(fd,"%g\n",&rate)) break;
 				TRY_YIELD;
 			}
 		}
 		return 0;
 	}
 	if(!strcmp(argv[1],"shift_unroll_cc"))
 	{
 		bigbufs=1;
 		float starting_phase=0;
 		float rate;
 		int fd;
 		if(fd=init_fifo(argc,argv))
 		{
 			while(!read_fifo_ctl(fd,"%g\n",&rate)) usleep(10000);
 		}
 		else
 		{
 			if(argc<=2) return badsyntax("need required parameter (rate)"); 
 			sscanf(argv[2],"%g",&rate);
 		}
 		if(!sendbufsize(initialize_buffers())) return -2;
 		for(;;)
 		{
 			shift_unroll_data_t data=shift_unroll_init(rate, 1024);
 			fprintf(stderr,"shift_unroll_cc: reinitialized to %g\n",rate);
 			int remain, current_size;
 			float* ibufptr;
 			float* obufptr;
 			for(;;)
 			{
 				FEOF_CHECK;
 				if(!FREAD_C) break;
 				remain=the_bufsize;
 				ibufptr=input_buffer;
 				obufptr=output_buffer;
 				while(remain)
 				{
 					current_size=(remain>1024)?1024:remain;
 					starting_phase=shift_unroll_cc((complexf*)ibufptr, (complexf*)obufptr, current_size, &data, starting_phase);
 					ibufptr+=current_size*2;
 					obufptr+=current_size*2;
 					remain-=current_size;
 				}
 				FWRITE_C;
 				if(read_fifo_ctl(fd,"%g\n",&rate)) break;
 				TRY_YIELD;
 			}
 		}
 		return 0;
 	}
 #ifdef LIBCSDR_GPL
 	if(!strcmp(argv[1],"decimating_shift_addition_cc"))
 	{
@ -861,7 +974,7 @@ int main(int argc, char *argv[])
 		{
 			FEOF_CHECK;
 			FREAD_R;
-			for(int i=0; i<the_bufsize;i++) fprintf(stderr, "%g ",input_buffer[i]);
+			for(int i=0; i<the_bufsize;i++) printf("%g ",input_buffer[i]);
 			TRY_YIELD;
 		}
@ -948,7 +1061,7 @@ int main(int argc, char *argv[])
 		padded_taps_length = taps_length+(NEON_ALIGNMENT/4)-1 - ((taps_length+(NEON_ALIGNMENT/4)-1)%(NEON_ALIGNMENT/4));
 		fprintf(stderr,"padded_taps_length = %d\n", padded_taps_length);
-		taps = (float*) (float*)malloc(padded_taps_length+NEON_ALIGNMENT);
+		taps = (float*) (float*)malloc((padded_taps_length+NEON_ALIGNMENT)*sizeof(float));
 		fprintf(stderr,"taps = %x\n", taps);
 		taps =  (float*)((((unsigned)taps)+NEON_ALIGNMENT-1) & ~(NEON_ALIGNMENT-1));
 		fprintf(stderr,"taps = %x\n", taps);
@ -1211,6 +1324,68 @@ int main(int argc, char *argv[])
 		float rate;
 		sscanf(argv[2],"%g",&rate);
 		int num_poly_points = 12;
 		if(argc>=4) sscanf(argv[3],"%d",&num_poly_points);
 		if(num_poly_points&1) return badsyntax("num_poly_points should be even");
 		if(num_poly_points<2) return badsyntax("num_poly_points should be >= 2");
 		int use_prefilter = 0;
 		float transition_bw=0.03;
 		window_t window = WINDOW_DEFAULT;
 		if(argc>=5)
 		{
 			if(!strcmp(argv[4], "--prefilter")) 
 			{
 				fprintf(stderr, "fractional_decimator_ff: using prefilter with default values\n");
 				use_prefilter = 1;
 			}
 			else 
 			{
 				sscanf(argv[4],"%g",&transition_bw);
 				if(argc>=6) window = firdes_get_window_from_string(argv[5]);
 			}
 		}
 		fprintf(stderr,"fractional_decimator_ff: use_prefilter = %d, num_poly_points = %d, transition_bw = %g, window = %s\n", 
 			use_prefilter, num_poly_points, transition_bw, firdes_get_string_from_window(window));
 		if(!initialize_buffers()) return -2;
 		sendbufsize(the_bufsize / rate);
 		if(rate==1) clone_(the_bufsize); //copy input to output in this special case (and stick in this function).
 		//Generate filter taps
 		int taps_length = 0;
 		float* taps = NULL;
 		if(use_prefilter)
 		{
 			taps_length = firdes_filter_len(transition_bw);
 			fprintf(stderr,"fractional_decimator_ff: taps_length = %d\n",taps_length);
 			taps = (float*)malloc(sizeof(float)*taps_length);
 			firdes_lowpass_f(taps, taps_length, 0.5/(rate-transition_bw), window); //0.6 const to compensate rolloff
 			//for(int=0;i<taps_length; i++) fprintf(stderr,"%g ",taps[i]);
 		}
 		else fprintf(stderr,"fractional_decimator_ff: not using taps\n");
 		fractional_decimator_ff_t d = fractional_decimator_ff_init(rate, num_poly_points, taps, taps_length); 
 		for(;;)
 		{
 			FEOF_CHECK;
 			if(d.input_processed==0) d.input_processed=the_bufsize;
 			else memcpy(input_buffer, input_buffer+d.input_processed, sizeof(float)*(the_bufsize-d.input_processed));
 			fread(input_buffer+(the_bufsize-d.input_processed), sizeof(float), d.input_processed, stdin);
 			fractional_decimator_ff(input_buffer, output_buffer, the_bufsize, &d);
 			fwrite(output_buffer, sizeof(float), d.output_size, stdout);
 			//fprintf(stderr, "os = %d, ip = %d\n", d.output_size, d.input_processed);
 			TRY_YIELD;
 		}
 	}
 	if(!strcmp(argv[1],"old_fractional_decimator_ff"))
 	{
 		//Process the params
 		if(argc<=2) return badsyntax("need required parameters (rate)");
 		float rate;
 		sscanf(argv[2],"%g",&rate);
 		float transition_bw=0.03;
 		if(argc>=4) sscanf(argv[3],"%g",&transition_bw);
@ -1219,7 +1394,7 @@ int main(int argc, char *argv[])
 		{
 			window = firdes_get_window_from_string(argv[4]);
 		}
-		else fprintf(stderr,"fractional_decimator_ff: window = %s\n",firdes_get_string_from_window(window));
+		else fprintf(stderr,"old_fractional_decimator_ff: window = %s\n",firdes_get_string_from_window(window));
 		if(!initialize_buffers()) return -2;
 		sendbufsize(the_bufsize / rate);
@ -1228,19 +1403,19 @@ int main(int argc, char *argv[])
 		//Generate filter taps
 		int taps_length = firdes_filter_len(transition_bw);
-		fprintf(stderr,"fractional_decimator_ff: taps_length = %d\n",taps_length);
+		fprintf(stderr,"old_fractional_decimator_ff: taps_length = %d\n",taps_length);
 		float* taps = (float*)malloc(sizeof(float)*taps_length);
 		firdes_lowpass_f(taps, taps_length, 0.59*0.5/(rate-transition_bw), window); //0.6 const to compensate rolloff
 		//for(int=0;i<taps_length; i++) fprintf(stderr,"%g ",taps[i]);
-		static fractional_decimator_ff_t d; //in .bss => initialized to zero
+		static old_fractional_decimator_ff_t d; //in .bss => initialized to zero
 		for(;;)
 		{
 			FEOF_CHECK;
 			if(d.input_processed==0) d.input_processed=the_bufsize;
 			else memcpy(input_buffer, input_buffer+d.input_processed, sizeof(float)*(the_bufsize-d.input_processed));
 			fread(input_buffer+(the_bufsize-d.input_processed), sizeof(float), d.input_processed, stdin);
-			d = fractional_decimator_ff(input_buffer, output_buffer, the_bufsize, rate, taps, taps_length, d);
+			d = old_fractional_decimator_ff(input_buffer, output_buffer, the_bufsize, rate, taps, taps_length, d);
 			fwrite(output_buffer, sizeof(float), d.output_size, stdout);
 			TRY_YIELD;
 		}
@ -1283,6 +1458,8 @@ int main(int argc, char *argv[])
 		FFT_PLAN_T* plan=make_fft_c2c(fft_size, windowed, output, 1, benchmark);
 		if(benchmark) fprintf(stderr," done\n");
 		if(octave) printf("setenv(\"GNUTERM\",\"X11 noraise\");y=zeros(1,%d);semilogy(y,\"ydatasource\",\"y\");\n",fft_size);
 		float *windowt;
 		windowt = precalculate_window(fft_size, window);
 		for(;;)
 		{
 			FEOF_CHECK;
@ -1301,7 +1478,8 @@ int main(int argc, char *argv[])
 				for(int i=0;i<fft_size-every_n_samples;i++) input[i]=input[i+every_n_samples];
 				fread(input+fft_size-every_n_samples, sizeof(complexf), every_n_samples, stdin);
 			}
-			apply_window_c(input,windowed,fft_size,window);
+			//apply_window_c(input,windowed,fft_size,window);
 			apply_precalculated_window_c(input,windowed,fft_size,windowt);
 			fft_execute(plan);
 			if(octave)
 			{
@ -1337,6 +1515,40 @@ int main(int argc, char *argv[])
 		}
 	}
 	if(!strcmp(argv[1],"logaveragepower_cf"))
 	{
 		bigbufs=1;
 		if(argc<=4) return badsyntax("need required parameters (add_db, table_size, avgnumber)"); 
 		float add_db=0;
 		int avgnumber=0;
 		int fft_size=0;
 		sscanf(argv[2],"%g",&add_db);
 		sscanf(argv[3],"%d",&fft_size);
 		sscanf(argv[4],"%d",&avgnumber);
 		float *input = malloc(sizeof(float)*2 * fft_size);
 		float *output = malloc(sizeof(float) * fft_size);
 		add_db -= 10.0*log10(avgnumber);
 		for(;;)
 		{
 			int i,n;
 			for(i = 0; i < fft_size; i++) {
 				output[i] = 0;
 			}
 			FEOF_CHECK;
 			for(n = 0; n < avgnumber; n++) {
 				fread (input, sizeof(float)*2, fft_size, stdin);
 				accumulate_power_cf((complexf*)input, output, fft_size);
 			}
 			log_ff(output, output, fft_size, add_db);
 			fwrite (output, sizeof(float), fft_size, stdout);
 			TRY_YIELD;
 		}
 		return 0;
 	}
 	if(!strcmp(argv[1],"fft_exchange_sides_ff"))
 	{
 		if(argc<=2) return badsyntax("need required parameters (fft_size)");
@ -1441,8 +1653,6 @@ int main(int argc, char *argv[])
 		float high_cut;
 		float transition_bw;
 		window_t window = WINDOW_DEFAULT;
 		char window_string[256]; //TODO: nice buffer overflow opportunity
 		int fd;
 		if(fd=init_fifo(argc,argv))
 		{
@ -1874,11 +2084,162 @@ int main(int argc, char *argv[])
 		}
 	}
 	if( !strcmp(argv[1],"fastddc_fwd_cc") ) //<decimation> [transition_bw [window]]
 	{	
 		int decimation;
 		if(argc<=2) return badsyntax("need required parameter (decimation)");
 		sscanf(argv[2],"%d",&decimation);
 		float transition_bw = 0.05;
 		if(argc>3) sscanf(argv[3],"%g",&transition_bw);
 		window_t window = WINDOW_DEFAULT;
 		if(argc>4)	window=firdes_get_window_from_string(argv[4]);
 		else fprintf(stderr,"fastddc_fwd_cc: window = %s\n",firdes_get_string_from_window(window));
 		fastddc_t ddc; 
 		if(fastddc_init(&ddc, transition_bw, decimation, 0)) { badsyntax("error in fastddc_init()"); return 1; }
 		fastddc_print(&ddc,"fastddc_fwd_cc");
 		if(!initialize_buffers()) return -2;
 		sendbufsize(ddc.fft_size);
 		//make FFT plan
 		complexf* input = 	 (complexf*)fft_malloc(sizeof(complexf)*ddc.fft_size);
 		complexf* windowed = (complexf*)fft_malloc(sizeof(complexf)*ddc.fft_size);
 		complexf* output =   (complexf*)fft_malloc(sizeof(complexf)*ddc.fft_size);
 		for(int i=0;i<ddc.fft_size;i++) iof(input,i)=qof(input,i)=0; //null the input buffer
 		int benchmark = 1; 
 		if(benchmark) fprintf(stderr,"fastddc_fwd_cc: benchmarking FFT...");
 		FFT_PLAN_T* plan=make_fft_c2c(ddc.fft_size, windowed, output, 1, benchmark);
 		if(benchmark) fprintf(stderr," done\n");
 		for(;;)
 		{
 			FEOF_CHECK;
 			//overlapped FFT
 			for(int i=0;i<ddc.overlap_length;i++) input[i]=input[i+ddc.input_size];
 			fread(input+ddc.overlap_length, sizeof(complexf), ddc.input_size, stdin);
 			//apply_window_c(input,windowed,ddc.fft_size,window);
 			memcpy(windowed, input, ddc.fft_size*sizeof(complexf)); //we can switch off windows; TODO: it is likely that we shouldn't apply a window to both the FFT and the filter.
 			fft_execute(plan);
 			fwrite(output, sizeof(complexf), ddc.fft_size, stdout);
 			TRY_YIELD;
 		}
 	}
 	if( !strcmp(argv[1],"fastddc_inv_cc") ) //<shift_rate> <decimation> [transition_bw [window]]
 	{	
 		float shift_rate;
 		int plusarg=0;
 		int fd;
 		if(fd=init_fifo(argc,argv))
 		{
 			while(!read_fifo_ctl(fd,"%g\n",&shift_rate)) usleep(10000);
 			plusarg=1;
 		}
 		else
 		{
 			if(argc<=2) return badsyntax("need required parameter (rate)"); 
 			sscanf(argv[2],"%g",&shift_rate);
 		}
 		int decimation;
 		if(argc<=3+plusarg) return badsyntax("need required parameter (decimation)");
 		sscanf(argv[3+plusarg],"%d",&decimation);
 		//fprintf(stderr, "dec=%d %d\n", decimation);
 		float transition_bw = 0.05;
 		if(argc>4+plusarg) sscanf(argv[4+plusarg],"%g",&transition_bw);
 		window_t window = WINDOW_DEFAULT;
 		if(argc>5+plusarg)	window=firdes_get_window_from_string(argv[5+plusarg]);
 		else fprintf(stderr,"fastddc_apply_cc: window = %s\n",firdes_get_string_from_window(window));
 		for(;;)
 		{
 		fastddc_t ddc; 
 		if(fastddc_init(&ddc, transition_bw, decimation, shift_rate)) { badsyntax("error in fastddc_init()"); return 1; }
 		fastddc_print(&ddc,"fastddc_inv_cc");
 		if(!initialize_buffers()) return -2;
 		sendbufsize(ddc.post_input_size/ddc.post_decimation); //TODO not exactly correct
 		//prepare making the filter and doing FFT on it
 		complexf* taps=(complexf*)calloc(sizeof(complexf),ddc.fft_size); //initialize to zero
 		complexf* taps_fft=(complexf*)malloc(sizeof(complexf)*ddc.fft_size);
 		FFT_PLAN_T* plan_taps = make_fft_c2c(ddc.fft_size, taps, taps_fft, 1, 0); //forward, don't benchmark (we need this only once)
 		//make the filter
 		float filter_half_bw = 0.5/decimation;
 		fprintf(stderr, "fastddc_inv_cc: preparing a bandpass filter of [%g, %g] cutoff rates. Real transition bandwidth is: %g\n", (-shift_rate)-filter_half_bw, (-shift_rate)+filter_half_bw, 4.0/ddc.taps_length);
 		firdes_bandpass_c(taps, ddc.taps_length, (-shift_rate)-filter_half_bw, (-shift_rate)+filter_half_bw, window);
 		fft_execute(plan_taps);
 		fft_swap_sides(taps_fft,ddc.fft_size);
 		//make FFT plan
 		complexf* inv_input = 	 (complexf*)fft_malloc(sizeof(complexf)*ddc.fft_inv_size);
 		complexf* inv_output =   (complexf*)fft_malloc(sizeof(complexf)*ddc.fft_inv_size);
 		fprintf(stderr,"fastddc_inv_cc: benchmarking FFT...");
 		FFT_PLAN_T* plan_inverse = make_fft_c2c(ddc.fft_inv_size, inv_input, inv_output, 0, 1); //inverse, do benchmark
 		fprintf(stderr," done\n");
 		//alloc. buffers
 		complexf* input = 	 (complexf*)fft_malloc(sizeof(complexf)*ddc.fft_size);
 		complexf* output =   (complexf*)fft_malloc(sizeof(complexf)*ddc.post_input_size);
 		decimating_shift_addition_status_t shift_stat;
 		bzero(&shift_stat, sizeof(shift_stat));
 		for(;;)
 		{
 			FEOF_CHECK;
 			fread(input, sizeof(complexf), ddc.fft_size, stdin);
 			shift_stat = fastddc_inv_cc(input, output, &ddc, plan_inverse, taps_fft, shift_stat);
 			fwrite(output, sizeof(complexf), shift_stat.output_size, stdout);
 			//fprintf(stderr, "ss os = %d\n", shift_stat.output_size);
 			TRY_YIELD;
 			if(read_fifo_ctl(fd,"%g\n",&shift_rate)) break;
 		}
 		}
 	}
 	if( !strcmp(argv[1], "_fft2octave") ) 
 	{
 		int fft_size;
 		if(argc<=2) return badsyntax("need required parameter (fft_size)");
 		sscanf(argv[2],"%d",&fft_size);
 		complexf* fft_input=(complexf*)malloc(sizeof(complexf)*fft_size);
 		initialize_buffers();
 		if(!sendbufsize(fft_size)) return -2;
 		printf("setenv(\"GNUTERM\",\"X11 noraise\");y=zeros(1,%d);semilogy(y,\"ydatasource\",\"y\");\n",fft_size);
 		for(;;)
 		{
 			FEOF_CHECK;
 			fread(fft_input, sizeof(complexf), fft_size, stdin);
 			printf("fftdata=[");
 			//we have to swap the two parts of the array to get a valid spectrum
 			for(int i=fft_size/2;i<fft_size;i++) printf("(%g)+(%g)*i ",iof(fft_input,i),qof(fft_input,i));
 			for(int i=0;i<fft_size/2;i++) printf("(%g)+(%g)*i ",iof(fft_input,i),qof(fft_input,i)); 
 			printf(
 				"];\n"
 				"y=abs(fftdata);\n"
 				"refreshdata;\n"
 			);
 		}
 	}
 	if(!strcmp(argv[1],"none"))
 	{
 		return 0;
 	}
-	return badsyntax("function name given in argument 1 does not exist. Possible causes:\n- You mistyped the commandline.\n- You need to update csdr to a newer version (if available).");
+	fprintf(stderr,"csdr: function name given in argument 1 (%s) does not exist. Possible causes:\n- You mistyped the commandline.\n- You need to update csdr to a newer version (if available).", argv[1]); return -1;
 }
--- a/ddcd.cpp
+++ b/ddcd.cpp
@ -0,0 +1,335 @@
 /*
 This software is part of libcsdr, a set of simple DSP routines for
 Software Defined Radio.
 Copyright (c) 2014, Andras Retzler <randras@sdr.hu>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of the copyright holder nor the
      names of its contributors may be used to endorse or promote products
      derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED. IN NO EVENT SHALL ANDRAS RETZLER BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 #include "ddcd.h"
 int host_port = 0;
 char host_address[100] = "127.0.0.1";
 int thread_cntr = 0;
 //CLI parameters
 int decimation = 0;
 float transition_bw = 0.05;
 int bufsize = 1024; //! currently unused
 int bufcnt = 1024;
 char ddc_method_str[100] = "td";
 ddc_method_t ddc_method;
 void sig_handler(int signo)
 {
 	fprintf(stderr, MSG_START "signal %d caught, exiting ddcd...\n", signo);
 	fflush(stderr);
 	exit(0);
 }
 int main(int argc, char* argv[])
 {
 	int c;
 	for(;;)
 	{
 		int option_index = 0;
 		static struct option long_options[] = {
 		   {"port",       required_argument, 0,  'p' },
 		   {"address",    required_argument, 0,  'a' },
 		   {"decimation", required_argument, 0,  'd' },
 		   {"bufsize", 	  required_argument, 0,  'b' },
 		   {"bufcnt", 	  required_argument, 0,  'n' },
 	       {"method", 	  required_argument, 0,  'm' },
 	       {"transition", required_argument, 0,  't' }
 		};
 		c = getopt_long(argc, argv, "p:a:d:b:n:m:t:", long_options, &option_index);
 		if(c==-1) break;
 		switch (c)
 		{
 		case 'a':
 			host_address[100-1]=0;
 			strncpy(host_address,optarg,100-1);
 			break;
 		case 'p':
 			host_port=atoi(optarg);
 			break;
 		case 'd':
 			decimation=atoi(optarg);
 			break;
 		case 'b':
 			bufsize=atoi(optarg);
 			break;
 		case 'n':
 			bufcnt=atoi(optarg);
 			break;
 		case 'm':
 			ddc_method_str[100-1]=0;
 			strncpy(ddc_method_str,optarg,100-1);
 			break;
 		case 't':
 			sscanf(optarg,"%g",&transition_bw);
 			break;
 		case 0:
 		case '?':
 		case ':':
 		default:;
 			print_exit(MSG_START "error in getopt_long()\n");
 		}
 	}
 	if(!decimation) print_exit(MSG_START "missing required command line argument, --decimation.\n");
 	if(!host_port) print_exit(MSG_START "missing required command line argument, --port.\n");
 	if(decimation<0) print_exit(MSG_START "invalid value for --decimation (should be >0).\n");
 	if(decimation==1) fprintf(stderr, MSG_START "decimation = 1, just copying raw samples.\n");
 	if(transition_bw<0||transition_bw>0.5) print_exit(MSG_START "invalid value for --transition (should be between 0 and 0.5).\n");
 	if(bufsize<0) print_exit(MSG_START "invalid value for --bufsize (should be >0)\n");
 	if(bufcnt<0) print_exit(MSG_START "invalid value for --bufcnt (should be >0)\n");
 	if(decimation==1); //don't do anything then //!will have to take care about this later
 	else if(!strcmp(ddc_method_str,"td"))
 	{
 		ddc_method = M_TD;
 		fprintf(stderr, MSG_START "method is M_TD (default).\n");
 	}
 	else if (!strcmp(ddc_method_str,"fastddc"))
 	{
 		ddc_method = M_FASTDDC;
 		fprintf(stderr, MSG_START "method is M_FASTDDC.\n");
 	}
 	else print_exit(MSG_START "invalid parameter given to --method.\n");
 	//set signals
 	struct sigaction sa;
 	memset(&sa, 0, sizeof(sa));
 	sa.sa_handler = sig_handler;
 	sigaction(SIGTERM, &sa, NULL);
 	sigaction(SIGKILL, &sa, NULL);
 	sigaction(SIGQUIT, &sa, NULL);
 	sigaction(SIGINT, &sa, NULL);
 	sigaction(SIGHUP, &sa, NULL);
 	struct sockaddr_in addr_host;
    int listen_socket;
 	std::vector<client_t*> clients;
 	clients.reserve(100);
    listen_socket=socket(AF_INET,SOCK_STREAM,0);
 	int sockopt = 1;
 	if( setsockopt(listen_socket, SOL_SOCKET, SO_REUSEADDR, (char *)&sockopt, sizeof(sockopt)) == -1 )
 		error_exit(MSG_START "cannot set SO_REUSEADDR");  //the best description on SO_REUSEADDR ever: http://stackoverflow.com/a/14388707/3182453
 	memset(&addr_host,'0',sizeof(addr_host));
    addr_host.sin_family = AF_INET;
    addr_host.sin_port = htons(host_port);
 	addr_host.sin_addr.s_addr = INADDR_ANY;
    if( (addr_host.sin_addr.s_addr=inet_addr(host_address)) == INADDR_NONE )
 		error_exit(MSG_START "invalid host address");
 	if( bind(listen_socket, (struct sockaddr*) &addr_host, sizeof(addr_host)) < 0 )
 		error_exit(MSG_START "cannot bind() address to the socket");
 	if( listen(listen_socket, 10) == -1 )
 		error_exit(MSG_START "cannot listen() on socket");
 	fprintf(stderr,MSG_START "listening on %s:%d\n", inet_ntoa(addr_host.sin_addr), host_port);
 	struct sockaddr_in addr_cli;
 	socklen_t addr_cli_len = sizeof(addr_cli);
 	int new_socket;
 	int highfd = 0;
 	FD_ZERO(&select_fds);
 	FD_SET(listen_socket, &select_fds);
 	maxfd(&highfd, listen_socket);
 	FD_SET(input_fd, &select_fds);
 	maxfd(&highfd, input_fd);
 	//Set stdin and listen_socket to non-blocking
 	if(set_nonblocking(input_fd) || set_nonblocking(listen_socket))
 		error_exit(MSG_START "cannot set_nonblocking()");
 	//Create tsmpool
 	tsmpool* pool = new tsmpool(bufsize, bufcnt);
 	if(!pool->ok) print_exit(MSG_START "tsmpool failed to initialize\n");
 	unsigned char* current_write_buffer = pool->get_write_buffer();
 	int index_in_current_write_buffer = 0;
 	for(;;)
 	{
 		//Let's wait until there is any new data to read, or any new connection!
 		select(highfd, &select_fds, NULL, NULL, NULL);
 		//Is there a new client connection?
 		if( (new_socket = accept(listen_socket, (struct sockaddr*)&addr_cli, &addr_cli_len)) != -1)
 		{
 			clients_close_all_finished();
 			if(pthread_create(&new_client->thread, NULL, client_thread , (void*)&new_client)<0)
 			{
 				//We're the parent
 				client_t* new_client = new client_t;
 				new_client->error = 0;
 				memcpy(&new_client->addr, &addr_cli, sizeof(new_client->addr));
 				new_client->socket = new_socket;
 				new_client->status = CS_CREATED;
 				clients.push_back(new_client);
 				fprintf(stderr, MSG_START "pthread_create() done, clients now: %d\n", clients.size());
 			}
 			else  fprintf(stderr, MSG_START "pthread_create() failed.\n");
 		}
 		if(index_in_current_write_buffer >= bufsize)
 		{
 			current_write_buffer = pool->get_write_buffer();
 			index_in_current_write_buffer = 0;
 		}
 		int retval = read(input_fd, current_write_buffer + index_in_current_write_buffer, bufsize - index_in_current_write_buffer);
 		if(retval>0)
 		{
 			index_in_current_write_buffer += retval;
 		}
 		else if(retval==0)
 		{
 			//!end of input stream, close clients and exit
 			print_exit(MSG_START "end of input, exiting.\n")
 		}
 	}
 }
 #if 0
 for (int i=0; i<clients.size(); i++)
 {
 	if(write(clients[i]->pipefd[1], buf, retval)==-1)
 	{
 		if(!clients[i]->error)
 		{
 			print_client(clients[i], "lost buffer, failed to write pipe.");
 			clients[i]->error=1;
 		}
 		//fprintf(stderr, MSG_START "errno is %d\n", errno); //usually 11
 		//int wpstatus;
 		//int wpresult = waitpid(clients[i]->pid, &wpstatus, WNOHANG);
 		//fprintf(stderr, MSG_START "pid is %d\n",clients[i]->pid);
 		//perror("somethings wrong");
 		//if(wpresult == -1) print_client(clients[i], "error while waitpid()!");
 		//else if(wpresult == 0)
 		waitpid(clients[i]->pid, NULL, WNOHANG);
 		if(!proc_exists(clients[i]->pid))
 		{
 			//Client exited!
 			print_client(clients[i], "closing client from main process.");
 			close(clients[i]->pipefd[1]);
 			close(clients[i]->socket);
 			delete clients[i];
 			clients.erase(clients.begin()+i);
 			fprintf(stderr, MSG_START "done closing client from main process.\n");
 		}
 	}
 	else  { if(clients[i]->error) print_client(clients[i], "pipe okay again."); clients[i]->error=0; }
 }
 }
 //TODO: at the end, server closes pipefd[1] for client
 #endif
 void clients_close_all_finished()
 {
 	for(int i=0;i<clients.size();i++)
 	{
 		if(clients[i]->status == CS_THREAD_FINISHED) clients.erase(i);
 	}
 }
 void client_parser_push(char c)
 { //!TODO
 	command_t cmd;
 	char* commands_cstr = commands.c_str();
 	int newline_index = -1;
 	for(int i=0;commands_cstr[i];i++) if(commands_cstr[i]=='\n') newline_index = i;
 	if(newline_index == -1)
 	char param_name[101];
 	char param_value[101];
 	for(int i=0;i<100;commands_csdr
 }
 void* client_thread (void* param) //!TODO
 {
 	client_t* me_the_client = (client_t*)param;
 	me_the_client->status = CS_THREAD_RUNNING;
 	char ctl_data_buffer;
 	int retval;
 	tsmpool* p1_temp;
 	tsmpool* p2_temp;
 	const int num_client_buffers = 20;
 	if(ddc_method == M_TD)
 	{
 		p1_temp = new tsmpool(bufsize, )
 	}
 	for(;;)
 	{
 		do
 		{
 			retval = recv(me_the_client->socket, &ctl_data_buffer, 1, 0);
 			if(client_parser_push(ctl_data_buffer)) break;
 		} while (retval);
 		//read control data from socket
 		//process control data
 		//run shift
 		//run decimation
 		//have an exit condition (??)
 		if(ddc_method == M_TD)
 		{
 		}
 	}
 	me_the_client->status = CS_THREAD_FINISHED;
 	pthread_exit(NULL);
 	return NULL;
 }
 void error_exit(const char* why)
 {
 	perror(why); //do we need a \n at the end of (why)?
 	exit(1); 
 }
 void print_exit(const char* why)
 {
 	fprintf(stderr, "%s", why);
 	exit(1);
 }
 void maxfd(int* maxfd, int fd)
 {
 	if(fd>=*maxfd) *maxfd=fd+1;
 }
--- a/ddcd.h
+++ b/ddcd.h
@ -0,0 +1,57 @@
 #pragma once
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <getopt.h>
 #include <signal.h>
 #include <unistd.h>
 #include <arpa/inet.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include <vector>
 #include <limits.h>
 #define SOFTWARE_NAME "ddcd"
 #define MSG_START SOFTWARE_NAME ": "
 typedef enum ddc_method_e
 {
 	M_TD,
 	M_FASTDDC
 } ddc_method_t;
 typedef enum client_status_e
 {
 	CS_CREATED,
 	CS_THREAD_RUNNING,
 	CS_THREAD_FINISHED
 } client_status_t;
 typedef struct client_s
 {
 	struct sockaddr_in addr;
 	int socket;
 	int error; //set to non-zero on error (data transfer failed)
 	pthread_t thread;
 	client_status_t status;
 } client_t;
 typedef enum command_type_e
 {
 	CT_SHIFT,
 	CT_BYPASS
 } command_type_t;
 typedef struct command_s
 {
 	command_type_t type;
 	float float_param;
 } command_t;
 void print_exit(const char* why);
 void error_exit(const char* why);
 void maxfd(int* maxfd, int fd);
--- a/ddcd_old.cpp
+++ b/ddcd_old.cpp
@ -0,0 +1,560 @@
 /*
 This software is part of libcsdr, a set of simple DSP routines for 
 Software Defined Radio.
 Copyright (c) 2014, Andras Retzler <randras@sdr.hu>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of the copyright holder nor the
      names of its contributors may be used to endorse or promote products
      derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED. IN NO EVENT SHALL ANDRAS RETZLER BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 #include "ddcd.h"
 #define SOFTWARE_NAME "ddcd"
 #define MSG_START SOFTWARE_NAME ": "
 int host_port = 0;
 char host_address[100] = "127.0.0.1";
 int decimation = 0;
 float transition_bw = 0.05;
 int bufsize = 1024;
 int bufsizeall;
 int pipe_max_size;
 int in_client = 0;
 char ddc_method_str[100] = "td";
 ddc_method_t ddc_method;
 pid_t main_dsp_proc;
 int input_fd = STDIN_FILENO; //can be stdin, or the stdout of main_subprocess
 pid_t main_subprocess_pid = 0;
 pid_t main_subprocess_pgrp = 0;
 pid_t client_subprocess_pid = 0;
 pid_t client_subprocess_pgrp = 0;
 char* buf;
 int set_nonblocking(int fd)
 {
 	int flagtmp;
 	if((flagtmp = fcntl(fd, F_GETFL))!=-1)
 		if((flagtmp = fcntl(fd, F_SETFL, flagtmp|O_NONBLOCK))!=-1)
 			return 0;
 	return 1;
 }
 int proc_exists(pid_t pid)
 {
 	if(pid==0 || pid==1) return 1;
 	return kill(pid, 0) != -1;
 }
 void sig_handler(int signo)
 {	
 	int tmpstat;
 	if(signo==SIGPIPE) 
 	{
 		fprintf(stderr,MSG_START "SIGPIPE received.\n");
 		return;
 	}	
 	if(signo==SIGCHLD)
 		if(  main_subprocess_pid  && signo==SIGCHLD && (waitpid(main_subprocess_pid, &tmpstat, WNOHANG), 1) && !proc_exists(main_subprocess_pid)  )
 		{
 			fprintf(stderr,MSG_START "main_subprocess_pid exited! Exiting...\n");
 		}
 	else return;
 	//if(pgrp!=1 && pgrp!=0) //I just want to make sure that we cannot kill init or sched
 	//	killpg(pgrp, signo);
 	if( !in_client && main_subprocess_pid ) killpg2(main_subprocess_pgrp);
 	if( in_client && client_subprocess_pid ) killpg2(client_subprocess_pgrp);
 	fprintf(stderr, MSG_START "signal %d caught in %s, exiting ddcd...\n", signo, (in_client)?"client":"main");
 	fflush(stderr);
 	exit(0);
 }
 client_t* this_client;
 int main(int argc, char* argv[])
 {
 	int c;
 	fd_set select_fds;
 	for(;;)
 	{
 		int option_index = 0;
 		static struct option long_options[] = {
 		   {"port",       required_argument, 0,  'p' },
 		   {"address",    required_argument, 0,  'a' },
 		   {"decimation", required_argument, 0,  'd' },
 		   {"bufsize", 	  required_argument, 0,  'b' },
 	       {"method", 	  required_argument, 0,  'm' },
 	       {"transition", required_argument, 0,  't' }
 		};
 		c = getopt_long(argc, argv, "p:a:d:b:m:t:", long_options, &option_index);
 		if(c==-1) break;
 		switch (c) 
 		{
 		case 'a':
 			host_address[100-1]=0;
 			strncpy(host_address,optarg,100-1);
 			break;
 		case 'p':
 			host_port=atoi(optarg);
 			break;
 		case 'd':
 			decimation=atoi(optarg);
 			break;
 		case 'b':
 			bufsize=atoi(optarg);
 			break;
 		case 'm':
 			ddc_method_str[100-1]=0;
 			strncpy(ddc_method_str,optarg,100-1);
 			break;
 		case 't':
 			sscanf(optarg,"%g",&transition_bw);
 			break;
 		case 0:
 		case '?':
 		case ':':
 		default:;
 			print_exit(MSG_START "error in getopt_long()\n");
 		}
 	}
 	if(!decimation) print_exit(MSG_START "missing required command line argument, --decimation.\n");
 	if(!host_port) print_exit(MSG_START "missing required command line argument, --port.\n");
 	if(decimation<0) print_exit(MSG_START "invalid value for --decimation (should be >0).\n");
 	if(decimation==1) fprintf(stderr, MSG_START "decimation = 1, just copying raw samples.\n");
 	if(transition_bw<0||transition_bw>0.5) print_exit(MSG_START "invalid value for --transition (should be between 0 and 0.5).\n");
 	if(decimation==1); //don't do anything then
 	else if(!strcmp(ddc_method_str,"td")) 
 	{
 		ddc_method = M_TD; 
 		fprintf(stderr, MSG_START "method is M_TD (default).\n");
 	}
 	else if (!strcmp(ddc_method_str,"fastddc")) 
 	{
 		ddc_method = M_FASTDDC; 
 		fprintf(stderr, MSG_START "method is M_FASTDDC.\n");
 	}
 	else print_exit(MSG_START "invalid parameter given to --method.\n");
 	//set signals
 	struct sigaction sa;
 	memset(&sa, 0, sizeof(sa));
 	sa.sa_handler = sig_handler;
 	sigaction(SIGTERM, &sa, NULL);
 	sigaction(SIGKILL, &sa, NULL);
 	sigaction(SIGQUIT, &sa, NULL);
 	sigaction(SIGINT, &sa, NULL);
 	sigaction(SIGHUP, &sa, NULL);
 	sigaction(SIGCHLD, &sa, NULL);
 	sigaction(SIGPIPE, &sa, NULL);
 	prctl(PR_SET_PDEATHSIG, SIGHUP); //get a signal when parent exits
 	struct sockaddr_in addr_host;
    int listen_socket;
 	std::vector<client_t*> clients;
 	clients.reserve(100);
    listen_socket=socket(AF_INET,SOCK_STREAM,0);
 	int sockopt = 1;
 	if( setsockopt(listen_socket, SOL_SOCKET, SO_REUSEADDR, (char *)&sockopt, sizeof(sockopt)) == -1 )
 		error_exit(MSG_START "cannot set SO_REUSEADDR");  //the best description on SO_REUSEADDR ever: http://stackoverflow.com/a/14388707/3182453
    memset(&addr_host,'0',sizeof(addr_host));
    addr_host.sin_family=AF_INET;
    addr_host.sin_port=htons(host_port);
 	addr_host.sin_addr.s_addr = INADDR_ANY;
    if( (addr_host.sin_addr.s_addr=inet_addr(host_address)) == INADDR_NONE ) 
 		error_exit(MSG_START "invalid host address");
 	if( bind(listen_socket, (struct sockaddr*) &addr_host, sizeof(addr_host)) < 0 )
 		error_exit(MSG_START "cannot bind() address to the socket");
 	if( listen(listen_socket, 10) == -1 )
 		error_exit(MSG_START "cannot listen() on socket");
 	fprintf(stderr,MSG_START "listening on %s:%d\n", inet_ntoa(addr_host.sin_addr), host_port);
 	struct sockaddr_in addr_cli;
 	socklen_t addr_cli_len = sizeof(addr_cli);
 	int new_socket;
 	bufsizeall = bufsize*sizeof(char);
 	buf = (char*)malloc(bufsizeall);
 	FILE* tempfile = fopen("/proc/sys/fs/pipe-max-size","r");
 	if(!tempfile)
 	{
 		perror(MSG_START "cannot read /proc/sys/fs/pipe-max-size");
 	}
 	else
 	{
 		char pipe_max_size_str[100];
 		int tfread = fread(pipe_max_size_str, 1, 100, tempfile);
 		pipe_max_size_str[tfread]='\0';
 		pipe_max_size = atoi(pipe_max_size_str);
 		//fprintf(stderr, MSG_START "note: pipe_max_size = %d\n", pipe_max_size);
 		//if(pipe_max_size>4096 && fcntl(STDIN_FILENO, F_SETPIPE_SZ, pipe_max_size)==-1)
 		//	perror("failed to fcntl(STDIN_FILENO, F_SETPIPE_SZ, ...)");
 	}
 	//We'll see if it is a good idea:
 	//setpgrp();
 	//pgrp = getpgrp();
 	//It is not, because we can't catch Ctrl+C (SIGINT), as it is sent to a process group...
 	//Start DSP subprocess from the main process if required
 	char main_subprocess_cmd_buf[500];
 	int pipe_m2s_ctl[2];	//main to subprocess :: control channel
 	int pipe_s2m[2];		//subprocess to main
 	if(pipe(pipe_m2s_ctl)) error_exit(MSG_START "couldn't create pipe_m2s_ctl");
 	if(pipe(pipe_s2m)) error_exit(MSG_START "couldn't create pipe_s2m");
 	if(decimation!=1)
 	{
 		switch(ddc_method)
 		{
 		case M_TD:
 			break;
 		case M_FASTDDC:
 			sprintf(main_subprocess_cmd_buf, subprocess_args_fastddc_1, decimation, transition_bw);
 			fprintf(stderr, MSG_START "starting main_subprocess_cmd: %s\n", main_subprocess_cmd_buf);
 			if(!(main_subprocess_pid = run_subprocess( main_subprocess_cmd_buf, 0, pipe_s2m, &main_subprocess_pgrp )))
 				print_exit(MSG_START "couldn't start main_subprocess_cmd!\n");
 			close(STDIN_FILENO); // redirect stdin to the stdin of the subprocess 
 			break;
 		}
 	}
 	int highfd = 0; 
 	FD_ZERO(&select_fds);
 	FD_SET(listen_socket, &select_fds);
 	maxfd(&highfd, listen_socket);
 	if(main_subprocess_pid) input_fd = pipe_s2m[0]; //else STDIN_FILENO
 	FD_SET(input_fd, &select_fds);
 	maxfd(&highfd, input_fd);
 	//Set stdin and listen_socket to non-blocking 
 	if(set_nonblocking(input_fd) || set_nonblocking(listen_socket)) //don't do it before subprocess fork!
 		error_exit(MSG_START "cannot set_nonblocking()");
 	for(;;)
 	{
 		//Let's wait until there is any new data to read, or any new connection!
 		select(highfd, &select_fds, NULL, NULL, NULL);
 		//Is there a new client connection?
 		if( (new_socket = accept(listen_socket, (struct sockaddr*)&addr_cli, &addr_cli_len)) != -1)
 		{ 
 			this_client = new client_t;
 			this_client->error = 0;
 			memcpy(&this_client->addr, &addr_cli, sizeof(this_client->addr));
 			this_client->socket = new_socket;
 			if(pipe(this_client->pipefd) == -1)
 			{ 
 				perror(MSG_START "cannot open new pipe() for the client");
 				continue;
 			}
 			if(fcntl(this_client->pipefd[1], F_SETPIPE_SZ, pipe_max_size) == -1)
 				perror("failed to F_SETPIPE_SZ for the client pipe");
 			if(this_client->pid = fork())
 			{
 				//We're the parent
 				set_nonblocking(this_client->pipefd[1]);
 				clients.push_back(this_client);
 				fprintf(stderr, MSG_START "client pid: %d\n", this_client->pid);
 			}
 			else
 			{
 				//We're the client
 				client();
 				return 1;
 			}
 		}
 		int retval = read(input_fd, buf, bufsizeall);
 		if(retval==0)
 		{
 			//end of input stream, close clients and exit
 		}
 		else if(retval != -1)
 		{
 			for (int i=0; i<clients.size(); i++)
 			{
 				if(write(clients[i]->pipefd[1], buf, retval)==-1)
 				{
 					if(!clients[i]->error) 
 					{
 						print_client(clients[i], "lost buffer, failed to write pipe.");
 						clients[i]->error=1;
 					}
 					//fprintf(stderr, MSG_START "errno is %d\n", errno); //usually 11
 					//int wpstatus;
 					//int wpresult = waitpid(clients[i]->pid, &wpstatus, WNOHANG);
 					//fprintf(stderr, MSG_START "pid is %d\n",clients[i]->pid);
 					//perror("somethings wrong");
 					//if(wpresult == -1) print_client(clients[i], "error while waitpid()!");
 					//else if(wpresult == 0) 
 					waitpid(clients[i]->pid, NULL, WNOHANG);
 					if(!proc_exists(clients[i]->pid))
 					{
 						//Client exited!
 						print_client(clients[i], "closing client from main process.");
 						close(clients[i]->pipefd[1]);
 						close(clients[i]->socket);
 						delete clients[i];
 						clients.erase(clients.begin()+i);
 						fprintf(stderr, MSG_START "done closing client from main process.\n");
 					}
 				}
 				else  { if(clients[i]->error) print_client(clients[i], "pipe okay again."); clients[i]->error=0; }
 			}
 		}
 		//TODO: at the end, server closes pipefd[1] for client
 	}
 	return 0; 
 }
 pid_t run_subprocess(char* cmd, int* pipe_in, int* pipe_out, pid_t* pgrp)
 {
 	/*char sem_name[101];
 	snprintf(sem_name,100,"ddcd_sem_%d",getpid());
 	sem_t mysem;
 	if(sem_init(&mysem, 1, 1)==-1) error_exit("failed to sem_init() in run_subprocess()");
 	fprintf(stderr, "sem_waiting\n");
 	if(sem_wait(&mysem)==-1) error_exit("the first sem_wait() failed in run_subprocess()");
 	fprintf(stderr, "sem_waited\n");
 	*/
 	int syncpipe[2];
 	if(pipe(syncpipe)==-1) error_exit("failed to create pipe()");
 	pid_t pid = fork();
 	if(pid < 0) return 0; //fork failed
 	if(pid == 0)
 	{
 		setpgrp();
 		write(syncpipe[1], " ", 1);
 		//if(sem_post(&mysem)==-1) error_exit("failed to sem_post() in run_subprocess()");
 		//We're the subprocess
 		//fprintf(stderr, "run_subprocess :: execl\n");
 		//if(fcntl(pipe_in[1], F_SETPIPE_SZ, pipe_max_size) == -1) perror("Failed to F_SETPIPE_SZ in run_subprocess()");
 		if(pipe_in)
 		{
 			close(pipe_in[1]);
 			dup2(pipe_in[0], STDIN_FILENO);
 		}
 		if(pipe_out)
 		{
 			close(pipe_out[0]);
 			dup2(pipe_out[1], STDOUT_FILENO);
 		}
 		execl("/bin/bash","bash","-c",cmd, (char*)0);
 		error_exit(MSG_START "run_subprocess failed to execute command");
 	}
 	else
 	{ 
 		//if(sem_wait(&mysem)==-1) error_exit("the second sem_wait() failed in run_subprocess()");
 		int synctemp;
 		read(syncpipe[0], &synctemp, 1);
 		*pgrp = getpgid(pid);
 		fprintf(stderr, MSG_START "run_subprocess pgid returned = %d\n", *pgrp);
 		return pid;
 	}
 }
 void print_client(client_t* client, const char* what)
 {
 	fprintf(stderr,MSG_START "(client %s:%d) %s\n", inet_ntoa(client->addr.sin_addr), client->addr.sin_port, what);
 }
 #define CTL_BUFSIZE 1024
 int read_socket_ctl(int fd, char* output, int max_size)
 {
 	//fprintf(stderr, "doing read_socket_ctl %d\n", fd);
 	//if(!fd) return 0;
 	static char buffer[CTL_BUFSIZE];
 	static int buffer_index=0;
 	if(buffer_index==CTL_BUFSIZE) buffer_index=0;
 	int bytes_read=recv(fd,buffer+buffer_index,(CTL_BUFSIZE-buffer_index)*sizeof(char), MSG_DONTWAIT);
 	if(bytes_read<=0) return 0;
 	//fprintf(stderr, "recv %d\n", bytes_read);
 	int prev_newline_at=0;
 	int last_newline_at=0;
 	for(int i=0;i<buffer_index+bytes_read;i++) 
 	{
 		if(buffer[i]=='\n') 
 		{
 			prev_newline_at=last_newline_at;
 			last_newline_at=i+1;
 		}
 	}
 	if(last_newline_at)
 	{
 		int oi=0;
 		for(int i=prev_newline_at;buffer[i]!='\n'&&oi<max_size;i++) output[oi++]=buffer[i]; //copy to output buffer
 		output[oi++]='\0';
 		memmove(buffer,buffer+last_newline_at,buffer_index+bytes_read-last_newline_at);
 		buffer_index=bytes_read-last_newline_at;
 		return 1;
 	}
 	else
 	{
 		buffer_index+=bytes_read;
 	 	return 0;
 	}
 }
 int ctl_get_arg(char* input, const char* cmd, const char* format, ...)
 {
 	int retval=0;
 	int cmdlen=strlen(cmd);
 	if(input[cmdlen]=='=')
 	{
 		//fprintf(stderr, "cga found=\n");
 		if(input[cmdlen]=0, !strcmp(input,cmd))
 		{
 			//fprintf(stderr, "cga foundokay\n");
 			va_list vl;
 			va_start(vl,format);
 			retval=vsscanf(input+cmdlen+1,format,vl);
 			va_end(vl);
 		}
 		input[cmdlen]='=';
 	}
 	//fprintf(stderr, "cga retval %d\n", retval);
 	return retval;
 }
 void client()
 {
 	in_client=1;
 	print_client(this_client, "client process forked.");
 	char client_subprocess_cmd_buf[500];
 	int input_fd = this_client->pipefd[0];
 	int pipe_ctl[2], pipe_stdout[2];
 	prctl(PR_SET_PDEATHSIG, SIGHUP); //get a signal when parent exits
 	if(decimation!=1)
 	{
 		if(pipe(pipe_ctl)==-1) error_exit(MSG_START "cannot open new pipe() for the client subprocess");
 		if(pipe(pipe_stdout)==-1) error_exit(MSG_START "cannot open new pipe() for the client subprocess");
 		switch(ddc_method)
 		{
 		case M_TD:
 			sprintf(client_subprocess_cmd_buf, subprocess_cmd_td, pipe_ctl[0], decimation, transition_bw);
 			break;
 		case M_FASTDDC:
 			sprintf(client_subprocess_cmd_buf, subprocess_args_fastddc_2, pipe_ctl[0], decimation, transition_bw);			
 			break;
 		}
 		if(!(client_subprocess_pid = run_subprocess( client_subprocess_cmd_buf, this_client->pipefd, pipe_stdout, &client_subprocess_pgrp))) 
 			print_exit(MSG_START "couldn't start client_subprocess_cmd!\n");
 		fprintf(stderr, MSG_START "starting client_subprocess_cmd: %s\n", client_subprocess_cmd_buf);
 		input_fd = pipe_stdout[0]; //we don't have to set it nonblocking
 		fprintf(stderr, MSG_START "pipe_stdout[0] = %d\n", pipe_stdout[0]);
 		write(pipe_ctl[1], "0.0\n", 4);
 	}
 	char recv_cmd[CTL_BUFSIZE];
 	char temps[CTL_BUFSIZE*2];
 	int tempi;
 	float tempf;
 	for(;;)
 	{
 		while(read_socket_ctl(this_client->socket, recv_cmd, CTL_BUFSIZE)) 
 		{
 			sprintf(temps, "read_socket_ctl: %s", recv_cmd);
 			print_client(this_client, temps);
 			if(ctl_get_arg(recv_cmd, "bypass", "%d", &tempi))
 			{
 				if(tempi==1 && client_subprocess_pid)
 				{
 					//print_client(this_client, "suspending client_subprocess_pgrp...\n");
 					//fprintf(stderr, "client_subprocess_pgrp = %d\n", client_subprocess_pgrp);
 					//killpg(client_subprocess_pgrp, SIGTSTP);
 					//while(proc_exists(client_subprocess_pid)) usleep(10000);
 					//print_client(this_client, "done killing client_subprocess_pid.\n");
 					input_fd=this_client->pipefd[0]; //by doing this, we don't read from pipe_stdout[0] anymore, so that csdr stops doing anything, and also doesn't read anymore from the input: we get the whole I/Q stream!
 				}
 				if(tempi==0 && client_subprocess_pid)
 				{
 					input_fd=pipe_stdout[0];
 				}
 			}
 			if(ctl_get_arg(recv_cmd, "shift", "%g", &tempf))
 			{			
 				tempi=sprintf(temps, "%g\n", tempf);
 				write(pipe_ctl[1], temps, tempi);
 				fsync(pipe_ctl[1]);
 			}
 		}
 		int nread = read(input_fd,buf,bufsizeall);
 		if(nread<=0) continue;
 		if(send(this_client->socket,buf,nread,0)==-1)
 		{
 			print_client(this_client, "client process is exiting.\n");
 			if(client_subprocess_pid) killpg2(client_subprocess_pgrp);
 			exit(0);
 		}
 	}	
 }
 void killpg2(pid_t pgrp)
 {
 	//fprintf(stderr, MSG_START "killpg2: %d\n", pgrp);
 	if(pgrp!=1 && pgrp!=0) killpg(pgrp, SIGTERM);
 }
 void error_exit(const char* why)
 {
 	perror(why);
 	exit(1);
 }
 void print_exit(const char* why)
 {
 	fprintf(stderr, "%s", why);
 	exit(1);
 }
 void maxfd(int* maxfd, int fd)
 {
 	if(fd>=*maxfd) *maxfd=fd+1; 
 }
--- a/ddcd_old.h
+++ b/ddcd_old.h
@ -0,0 +1,62 @@
 #pragma once
 #include <signal.h>
 #include <stdio.h>
 #include <getopt.h>
 #include <string.h>
 #include <stdlib.h>
 #include <sys/socket.h>
 #include <sys/types.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <iostream>
 #include <vector>
 #include <unistd.h>
 #include <fcntl.h>
 #include <errno.h>
 #include <sys/wait.h>
 #include <sys/prctl.h>
 #include <stdarg.h>
 #include <sys/stat.h>
 #include <semaphore.h>
 typedef struct client_s
 {
 	struct sockaddr_in addr;
 	int socket;
 	pid_t pid;
 	int pipefd[2];
 	int error;
 	pid_t dsp_proc;
 } client_t;
 void client();
 void error_exit(const char* why);
 void print_exit(const char* why);
 void print_client(client_t* client, const char* what);
 int proc_exists(pid_t pid);
 pid_t run_subprocess(char* cmd, int* pipe_in, int* pipe_out, pid_t* pgrp);
 void maxfd(int* maxfd, int fd);
 void sig_handler(int signo);
 void killpg2(pid_t pgrp);
 int ctl_get_arg(char* input, const char* cmd, const char* format, ...);
 typedef enum ddc_method_e 
 {
 	M_TD,
 	M_FASTDDC
 } ddc_method_t;
 const char subprocess_cmd_td[] = "csdr "
 #ifdef NEON_OPTS
 	"shift_addfast_cc"
 #else
 	"shift_unroll_cc"
 #endif
 	" --fd %d | csdr fir_decimate_cc %d %g";
 const char subprocess_args_fastddc_1[] = "csdr fastddc_fwd_cc %d %g";
 //const char subprocess_args_fastddc_1[] = "csdr through %d %g";
 const char subprocess_args_fastddc_2[] = "csdr fastddc_inv_cc --fd %d %d %g";
 //const char subprocess_args_fastddc_2[] = "csdr convert_u8_f %d %d %g";
--- a/fastddc.c
+++ b/fastddc.c
@ -0,0 +1,166 @@
 /*
 This software is part of libcsdr, a set of simple DSP routines for 
 Software Defined Radio.
 Copyright (c) 2014, Andras Retzler <randras@sdr.hu>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of the copyright holder nor the
      names of its contributors may be used to endorse or promote products
      derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED. IN NO EVENT SHALL ANDRAS RETZLER BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 #include "fastddc.h"
 //DDC implementation based on:
 //http://www.3db-labs.com/01598092_MultibandFilterbank.pdf
 inline int is_integer(float a) { return floorf(a) == a; }
 int fastddc_init(fastddc_t* ddc, float transition_bw, int decimation, float shift_rate)
 {
 	ddc->pre_decimation = 1; //this will be done in the frequency domain
 	ddc->post_decimation = decimation; //this will be done in the time domain
 	while( is_integer((float)ddc->post_decimation/2) && ddc->post_decimation/2 != 1) 
 	{
 		ddc->post_decimation/=2;
 		ddc->pre_decimation*=2;
 	}
 	ddc->taps_min_length = firdes_filter_len(transition_bw); //his is the minimal number of taps to achieve the given transition_bw; we are likely to have more taps than this number.
 	ddc->taps_length = next_pow2(ceil(ddc->taps_min_length/(float)ddc->pre_decimation) * ddc->pre_decimation) + 1; //the number of taps must be a multiple of the decimation factor
 	ddc->fft_size = next_pow2(ddc->taps_length * 4); //it is a good rule of thumb for performance (based on the article), but we should do benchmarks
 	while (ddc->fft_size<ddc->pre_decimation) ddc->fft_size*=2; //fft_size should be a multiple of pre_decimation.
 	ddc->overlap_length = ddc->taps_length - 1;
 	ddc->input_size = ddc->fft_size - ddc->overlap_length;
 	ddc->fft_inv_size = ddc->fft_size / ddc->pre_decimation;
 	//Shift operation in the frequency domain: we can shift by a multiple of v.
 	ddc->v = ddc->fft_size/ddc->overlap_length; //overlap factor | +-1 ? (or maybe ceil() this?)
 	int middlebin=ddc->fft_size / 2;
 	ddc->startbin = middlebin + middlebin * (-shift_rate) * 2;	
 	//fprintf(stderr, "ddc->startbin=%g\n",(float)ddc->startbin);
 	ddc->startbin = ddc->v * round( ddc->startbin / (float)ddc->v );
 	//fprintf(stderr, "ddc->startbin=%g\n",(float)ddc->startbin);
 	ddc->offsetbin = ddc->startbin - middlebin;
 	ddc->post_shift = (ddc->pre_decimation)*(shift_rate+((float)ddc->offsetbin/ddc->fft_size));
 	ddc->pre_shift = ddc->offsetbin/(float)ddc->fft_size;
 	ddc->dsadata = decimating_shift_addition_init(ddc->post_shift, ddc->post_decimation);
 	//Overlap is scrapped, not added
 	ddc->scrap=ddc->overlap_length/ddc->pre_decimation; //TODO this is problematic sometimes! overlap_length = 401 :: scrap = 200
 	ddc->post_input_size=ddc->fft_inv_size-ddc->scrap;
 	return ddc->fft_size<=2; //returns true on error
 }
 void fastddc_print(fastddc_t* ddc, char* source)
 {
 	fprintf(stderr,
 		"%s: fastddc_print_sizes(): (fft_size = %d) = (taps_length = %d) + (input_size = %d) - 1\n"
 		"  overlap     ::  (overlap_length = %d) = taps_length - 1, taps_min_length = %d\n"
 		"  decimation  ::  decimation = (pre_decimation = %d) * (post_decimation = %d), fft_inv_size = %d\n"
 		"  shift       ::  startbin = %d, offsetbin = %d, v = %d, pre_shift = %g, post_shift = %g\n"
 		"  o&s         ::  post_input_size = %d, scrap = %d\n"
 		, 
 		source, ddc->fft_size, ddc->taps_length, ddc->input_size, 
 		ddc->overlap_length, ddc->taps_min_length,
 		ddc->pre_decimation, ddc->post_decimation, ddc->fft_inv_size,
 		ddc->startbin, ddc->offsetbin, ddc->v, ddc->pre_shift, ddc->post_shift, 
 		ddc->post_input_size, ddc->scrap );
 }
 void fft_swap_sides(complexf* io, int fft_size)
 {
 	int middle=fft_size/2;
 	complexf temp;
 	for(int i=0;i<middle;i++)
 	{
 		iof(&temp,0)=iof(io,i);
 		qof(&temp,0)=qof(io,i);
 		iof(io,i)=iof(io,i+middle);
 		qof(io,i)=qof(io,i+middle);
 		iof(io,i+middle)=iof(&temp,0);
 		qof(io,i+middle)=qof(&temp,0);
 	}
 }
 decimating_shift_addition_status_t fastddc_inv_cc(complexf* input, complexf* output, fastddc_t* ddc, FFT_PLAN_T* plan_inverse, complexf* taps_fft, decimating_shift_addition_status_t shift_stat)
 {
 	//implements DDC by using the overlap & scrap method
 	//TODO: +/-1s on overlap_size et al
 	//input shoud have ddc->fft_size number of elements
 	complexf* inv_input = plan_inverse->input;
 	complexf* inv_output = plan_inverse->output;
 	//Initialize buffers for inverse FFT to zero
 	for(int i=0;i<plan_inverse->size;i++)
 	{
 		iof(inv_input,i)=0;
 		qof(inv_input,i)=0;
 	}
 	//Alias & shift & filter at once
 	fft_swap_sides(input, ddc->fft_size); //TODO this is not very optimal, but now we stick with this slow solution until we got the algorithm working
 	//fprintf(stderr, " === fastddc_inv_cc() ===\n");
 	//The problem is, we have to say that the output_index should be the _center_ of the spectrum when i is at startbin! (startbin is at the _center_ of the input to downconvert, not at its first bin!)
 	for(int i=0;i<ddc->fft_size;i++)
 	{
 		int output_index = (ddc->fft_size+i-ddc->offsetbin+(ddc->fft_inv_size/2))%plan_inverse->size;
 		int tap_index = i;
 		//fprintf(stderr, "output_index = %d , tap_index = %d, input index = %d\n", output_index, tap_index, i);
 		//cmultadd(inv_input+output_index, input+i, taps_fft+tap_index); //cmultadd(output, input1, input2):   complex output += complex input1 * complex input 2
 		// (a+b*i)*(c+d*i) = (ac-bd)+(ad+bc)*i
 		// a = iof(input,i)
 		// b = qof(input,i)
 		// c = iof(taps_fft,i)
 		// d = qof(taps_fft,i)
 		iof(inv_input,output_index) += iof(input,i) * iof(taps_fft,i) - qof(input,i) * qof(taps_fft,i);
 		qof(inv_input,output_index) += iof(input,i) * qof(taps_fft,i) + qof(input,i) * iof(taps_fft,i);
 		//iof(inv_input,output_index) += iof(input,i); //no filter
 		//qof(inv_input,output_index) += qof(input,i);		
 	}
 	//Normalize inv fft bins (now our output level is not higher than the input... but we may optimize this into the later loop when we normalize by size)
 	for(int i=0;i<plan_inverse->size;i++)
 	{
 		iof(inv_input,i)/=ddc->pre_decimation;
 		qof(inv_input,i)/=ddc->pre_decimation;
 	}
 	fft_swap_sides(inv_input,plan_inverse->size);
 	fft_execute(plan_inverse);
 	//Normalize data
 	for(int i=0;i<plan_inverse->size;i++) //@fastddc_inv_cc: normalize by size
 	{
 		iof(inv_output,i)/=plan_inverse->size;
 		qof(inv_output,i)/=plan_inverse->size;
 	}
 	//Overlap is scrapped, not added
 	//Shift correction
 	shift_stat=decimating_shift_addition_cc(inv_output+ddc->scrap, output, ddc->post_input_size, ddc->dsadata, ddc->post_decimation, shift_stat);
 	//shift_stat.output_size = ddc->post_input_size; //bypass shift correction
 	//memcpy(output, inv_output+ddc->scrap, sizeof(complexf)*ddc->post_input_size);
 	return shift_stat;
 }
--- a/fastddc.h
+++ b/fastddc.h
@ -0,0 +1,29 @@
 #include <math.h>
 #include "libcsdr.h"
 #include "libcsdr_gpl.h"
 typedef struct fastddc_s
 {
 	int pre_decimation;
 	int post_decimation;
 	int taps_length; 
 	int taps_min_length;
 	int overlap_length; //it is taps_length - 1
 	int fft_size;
 	int fft_inv_size;
 	int input_size;
 	int post_input_size;
 	float pre_shift;
 	int startbin; //for pre_shift
 	int v; //step for pre_shift
 	int offsetbin;
 	float post_shift;
 	int output_scrape;
 	int scrap;
 	shift_addition_data_t dsadata;
 } fastddc_t;
 int fastddc_init(fastddc_t* ddc, float transition_bw, int decimation, float shift_rate);
 decimating_shift_addition_status_t fastddc_inv_cc(complexf* input, complexf* output, fastddc_t* ddc, FFT_PLAN_T* plan_inverse, complexf* taps_fft, decimating_shift_addition_status_t shift_stat);
 void fastddc_print(fastddc_t* ddc, char* source);
 void fft_swap_sides(complexf* io, int fft_size);
--- a/grc_tests/test_bandpass_fir_fft.grc
+++ b/grc_tests/test_bandpass_fir_fft.grc
--- a/grc_tests/test_fastddc.grc
+++ b/grc_tests/test_fastddc.grc
@ -0,0 +1,996 @@
 <?xml version='1.0' encoding='utf-8'?>
 <?grc format='1' created='3.7.8'?>
 <flow_graph>
  <timestamp>Sat Nov 15 20:06:19 2014</timestamp>
  <block>
    <key>options</key>
    <param>
      <key>author</key>
      <value></value>
    </param>
    <param>
      <key>window_size</key>
      <value>1280, 1024</value>
    </param>
    <param>
      <key>category</key>
      <value>Custom</value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>description</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(10, 10)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>generate_options</key>
      <value>wx_gui</value>
    </param>
    <param>
      <key>id</key>
      <value>top_block</value>
    </param>
    <param>
      <key>max_nouts</key>
      <value>0</value>
    </param>
    <param>
      <key>realtime_scheduling</key>
      <value></value>
    </param>
    <param>
      <key>run_options</key>
      <value>prompt</value>
    </param>
    <param>
      <key>run</key>
      <value>True</value>
    </param>
    <param>
      <key>thread_safe_setters</key>
      <value></value>
    </param>
    <param>
      <key>title</key>
      <value></value>
    </param>
  </block>
  <block>
    <key>variable</key>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(128, 179)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>id</key>
      <value>decimation</value>
    </param>
    <param>
      <key>value</key>
      <value>4</value>
    </param>
  </block>
  <block>
    <key>variable_slider</key>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>converver</key>
      <value>float_converter</value>
    </param>
    <param>
      <key>value</key>
      <value>50</value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(16, 267)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>grid_pos</key>
      <value></value>
    </param>
    <param>
      <key>id</key>
      <value>freq</value>
    </param>
    <param>
      <key>label</key>
      <value></value>
    </param>
    <param>
      <key>max</key>
      <value>samp_rate/2</value>
    </param>
    <param>
      <key>min</key>
      <value>-samp_rate/2</value>
    </param>
    <param>
      <key>notebook</key>
      <value></value>
    </param>
    <param>
      <key>num_steps</key>
      <value>100</value>
    </param>
    <param>
      <key>style</key>
      <value>wx.SL_HORIZONTAL</value>
    </param>
  </block>
  <block>
    <key>variable</key>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(9, 170)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>id</key>
      <value>samp_rate</value>
    </param>
    <param>
      <key>value</key>
      <value>400000</value>
    </param>
  </block>
  <block>
    <key>analog_noise_source_x</key>
    <param>
      <key>amp</key>
      <value>1</value>
    </param>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>0</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(224, 403)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>id</key>
      <value>analog_noise_source_x_0</value>
    </param>
    <param>
      <key>maxoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>minoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>noise_type</key>
      <value>analog.GR_GAUSSIAN</value>
    </param>
    <param>
      <key>type</key>
      <value>complex</value>
    </param>
    <param>
      <key>seed</key>
      <value>0</value>
    </param>
  </block>
  <block>
    <key>analog_pll_freqdet_cf</key>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(112, 675)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>id</key>
      <value>analog_pll_freqdet_cf_0</value>
    </param>
    <param>
      <key>w</key>
      <value>(3.141592654/200)/2</value>
    </param>
    <param>
      <key>max_freq</key>
      <value>3.141592654</value>
    </param>
    <param>
      <key>maxoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>min_freq</key>
      <value>-3.141592654</value>
    </param>
    <param>
      <key>minoutbuf</key>
      <value>0</value>
    </param>
  </block>
  <block>
    <key>analog_sig_source_x</key>
    <param>
      <key>amp</key>
      <value>0.2</value>
    </param>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>1</value>
    </param>
    <param>
      <key>freq</key>
      <value>freq</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(224, 29)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>id</key>
      <value>analog_sig_source_x_0</value>
    </param>
    <param>
      <key>maxoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>minoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>offset</key>
      <value>0</value>
    </param>
    <param>
      <key>type</key>
      <value>complex</value>
    </param>
    <param>
      <key>samp_rate</key>
      <value>samp_rate</value>
    </param>
    <param>
      <key>waveform</key>
      <value>analog.GR_COS_WAVE</value>
    </param>
  </block>
  <block>
    <key>blocks_multiply_const_vxx</key>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>const</key>
      <value>(samp_rate/decimation)*(1/(2*3.141592654))</value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(368, 691)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>id</key>
      <value>blocks_multiply_const_vxx_0</value>
    </param>
    <param>
      <key>type</key>
      <value>float</value>
    </param>
    <param>
      <key>maxoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>minoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>vlen</key>
      <value>1</value>
    </param>
  </block>
  <block>
    <key>blocks_throttle</key>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(424, 235)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>id</key>
      <value>blocks_throttle_0</value>
    </param>
    <param>
      <key>ignoretag</key>
      <value>True</value>
    </param>
    <param>
      <key>maxoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>minoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>samples_per_second</key>
      <value>samp_rate</value>
    </param>
    <param>
      <key>type</key>
      <value>complex</value>
    </param>
    <param>
      <key>vlen</key>
      <value>1</value>
    </param>
  </block>
  <block>
    <key>ha5kfu_execproc_xx</key>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>commandline</key>
      <value>csdr fastddc_fwd_cc %d | csdr fastddc_inv_cc 0.4 %d"%(decimation,decimation)+"</value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(616, 235)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>id</key>
      <value>ha5kfu_execproc_xx_1</value>
    </param>
    <param>
      <key>maxoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>minoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>type</key>
      <value>cc</value>
    </param>
  </block>
  <block>
    <key>notebook</key>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(160, 283)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>grid_pos</key>
      <value></value>
    </param>
    <param>
      <key>id</key>
      <value>nb</value>
    </param>
    <param>
      <key>labels</key>
      <value>['FFT', 'Scope']</value>
    </param>
    <param>
      <key>notebook</key>
      <value></value>
    </param>
    <param>
      <key>style</key>
      <value>wx.NB_TOP</value>
    </param>
  </block>
  <block>
    <key>wxgui_fftsink2</key>
    <param>
      <key>avg_alpha</key>
      <value>0</value>
    </param>
    <param>
      <key>average</key>
      <value>False</value>
    </param>
    <param>
      <key>baseband_freq</key>
      <value>0</value>
    </param>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>fft_size</key>
      <value>1024</value>
    </param>
    <param>
      <key>freqvar</key>
      <value>None</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(952, 155)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>grid_pos</key>
      <value></value>
    </param>
    <param>
      <key>id</key>
      <value>wxgui_fftsink2_0</value>
    </param>
    <param>
      <key>notebook</key>
      <value>nb, 0</value>
    </param>
    <param>
      <key>peak_hold</key>
      <value>False</value>
    </param>
    <param>
      <key>ref_level</key>
      <value>0</value>
    </param>
    <param>
      <key>ref_scale</key>
      <value>2.0</value>
    </param>
    <param>
      <key>fft_rate</key>
      <value>15</value>
    </param>
    <param>
      <key>samp_rate</key>
      <value>samp_rate/decimation</value>
    </param>
    <param>
      <key>title</key>
      <value>FFT plot of csdr processed signal</value>
    </param>
    <param>
      <key>type</key>
      <value>complex</value>
    </param>
    <param>
      <key>win_size</key>
      <value></value>
    </param>
    <param>
      <key>win</key>
      <value>None</value>
    </param>
    <param>
      <key>y_divs</key>
      <value>10</value>
    </param>
    <param>
      <key>y_per_div</key>
      <value>10</value>
    </param>
  </block>
  <block>
    <key>wxgui_fftsink2</key>
    <param>
      <key>avg_alpha</key>
      <value>0</value>
    </param>
    <param>
      <key>average</key>
      <value>False</value>
    </param>
    <param>
      <key>baseband_freq</key>
      <value>0</value>
    </param>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>1</value>
    </param>
    <param>
      <key>fft_size</key>
      <value>1024</value>
    </param>
    <param>
      <key>freqvar</key>
      <value>None</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(616, 291)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>grid_pos</key>
      <value></value>
    </param>
    <param>
      <key>id</key>
      <value>wxgui_fftsink2_0_0</value>
    </param>
    <param>
      <key>notebook</key>
      <value></value>
    </param>
    <param>
      <key>peak_hold</key>
      <value>False</value>
    </param>
    <param>
      <key>ref_level</key>
      <value>0</value>
    </param>
    <param>
      <key>ref_scale</key>
      <value>2.0</value>
    </param>
    <param>
      <key>fft_rate</key>
      <value>15</value>
    </param>
    <param>
      <key>samp_rate</key>
      <value>samp_rate</value>
    </param>
    <param>
      <key>title</key>
      <value>FFT plot of original signal</value>
    </param>
    <param>
      <key>type</key>
      <value>complex</value>
    </param>
    <param>
      <key>win_size</key>
      <value></value>
    </param>
    <param>
      <key>win</key>
      <value>None</value>
    </param>
    <param>
      <key>y_divs</key>
      <value>10</value>
    </param>
    <param>
      <key>y_per_div</key>
      <value>10</value>
    </param>
  </block>
  <block>
    <key>wxgui_numbersink2</key>
    <param>
      <key>avg_alpha</key>
      <value>0</value>
    </param>
    <param>
      <key>average</key>
      <value>False</value>
    </param>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>decimal_places</key>
      <value>10</value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>factor</key>
      <value>1.0</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(576, 619)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>grid_pos</key>
      <value></value>
    </param>
    <param>
      <key>id</key>
      <value>wxgui_numbersink2_0</value>
    </param>
    <param>
      <key>max_value</key>
      <value>(samp_rate/decimation)/2</value>
    </param>
    <param>
      <key>min_value</key>
      <value>(-samp_rate/decimation)/2</value>
    </param>
    <param>
      <key>notebook</key>
      <value></value>
    </param>
    <param>
      <key>number_rate</key>
      <value>15</value>
    </param>
    <param>
      <key>peak_hold</key>
      <value>False</value>
    </param>
    <param>
      <key>ref_level</key>
      <value>0</value>
    </param>
    <param>
      <key>samp_rate</key>
      <value>samp_rate</value>
    </param>
    <param>
      <key>show_gauge</key>
      <value>True</value>
    </param>
    <param>
      <key>title</key>
      <value>PLL locked at</value>
    </param>
    <param>
      <key>type</key>
      <value>float</value>
    </param>
    <param>
      <key>units</key>
      <value>Hz</value>
    </param>
    <param>
      <key>win_size</key>
      <value></value>
    </param>
  </block>
  <block>
    <key>wxgui_scopesink2</key>
    <param>
      <key>ac_couple</key>
      <value>False</value>
    </param>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(952, 35)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>grid_pos</key>
      <value></value>
    </param>
    <param>
      <key>id</key>
      <value>wxgui_scopesink2_0</value>
    </param>
    <param>
      <key>notebook</key>
      <value>nb, 1</value>
    </param>
    <param>
      <key>num_inputs</key>
      <value>1</value>
    </param>
    <param>
      <key>samp_rate</key>
      <value>samp_rate/decimation</value>
    </param>
    <param>
      <key>t_scale</key>
      <value>0</value>
    </param>
    <param>
      <key>title</key>
      <value>Scope plot of csdr processed signal</value>
    </param>
    <param>
      <key>trig_mode</key>
      <value>wxgui.TRIG_MODE_AUTO</value>
    </param>
    <param>
      <key>type</key>
      <value>complex</value>
    </param>
    <param>
      <key>v_offset</key>
      <value>0</value>
    </param>
    <param>
      <key>v_scale</key>
      <value>0</value>
    </param>
    <param>
      <key>win_size</key>
      <value></value>
    </param>
    <param>
      <key>xy_mode</key>
      <value>False</value>
    </param>
    <param>
      <key>y_axis_label</key>
      <value>Counts</value>
    </param>
  </block>
  <connection>
    <source_block_id>analog_noise_source_x_0</source_block_id>
    <sink_block_id>blocks_throttle_0</sink_block_id>
    <source_key>0</source_key>
    <sink_key>0</sink_key>
  </connection>
  <connection>
    <source_block_id>analog_pll_freqdet_cf_0</source_block_id>
    <sink_block_id>blocks_multiply_const_vxx_0</sink_block_id>
    <source_key>0</source_key>
    <sink_key>0</sink_key>
  </connection>
  <connection>
    <source_block_id>analog_sig_source_x_0</source_block_id>
    <sink_block_id>blocks_throttle_0</sink_block_id>
    <source_key>0</source_key>
    <sink_key>0</sink_key>
  </connection>
  <connection>
    <source_block_id>blocks_multiply_const_vxx_0</source_block_id>
    <sink_block_id>wxgui_numbersink2_0</sink_block_id>
    <source_key>0</source_key>
    <sink_key>0</sink_key>
  </connection>
  <connection>
    <source_block_id>blocks_throttle_0</source_block_id>
    <sink_block_id>ha5kfu_execproc_xx_1</sink_block_id>
    <source_key>0</source_key>
    <sink_key>0</sink_key>
  </connection>
  <connection>
    <source_block_id>blocks_throttle_0</source_block_id>
    <sink_block_id>wxgui_fftsink2_0_0</sink_block_id>
    <source_key>0</source_key>
    <sink_key>0</sink_key>
  </connection>
  <connection>
    <source_block_id>ha5kfu_execproc_xx_1</source_block_id>
    <sink_block_id>analog_pll_freqdet_cf_0</sink_block_id>
    <source_key>0</source_key>
    <sink_key>0</sink_key>
  </connection>
  <connection>
    <source_block_id>ha5kfu_execproc_xx_1</source_block_id>
    <sink_block_id>wxgui_fftsink2_0</sink_block_id>
    <source_key>0</source_key>
    <sink_key>0</sink_key>
  </connection>
  <connection>
    <source_block_id>ha5kfu_execproc_xx_1</source_block_id>
    <sink_block_id>wxgui_scopesink2_0</sink_block_id>
    <source_key>0</source_key>
    <sink_key>0</sink_key>
  </connection>
 </flow_graph>
--- a/grc_tests/test_fractional_decimator.grc
+++ b/grc_tests/test_fractional_decimator.grc
--- a/grc_tests/test_rational_resampler.grc
+++ b/grc_tests/test_rational_resampler.grc
--- a/grc_tests/test_shift.grc
+++ b/grc_tests/test_shift.grc
--- a/grc_tests/test_shift_remote.grc
+++ b/grc_tests/test_shift_remote.grc
@ -0,0 +1,971 @@
 <?xml version='1.0' encoding='utf-8'?>
 <?grc format='1' created='3.7.8'?>
 <flow_graph>
  <timestamp>Thu Jan 15 18:51:48 2015</timestamp>
  <block>
    <key>options</key>
    <param>
      <key>author</key>
      <value></value>
    </param>
    <param>
      <key>window_size</key>
      <value>1280, 1024</value>
    </param>
    <param>
      <key>category</key>
      <value>Custom</value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>description</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(10, 10)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>generate_options</key>
      <value>wx_gui</value>
    </param>
    <param>
      <key>id</key>
      <value>top_block</value>
    </param>
    <param>
      <key>max_nouts</key>
      <value>0</value>
    </param>
    <param>
      <key>realtime_scheduling</key>
      <value></value>
    </param>
    <param>
      <key>run_options</key>
      <value>prompt</value>
    </param>
    <param>
      <key>run</key>
      <value>True</value>
    </param>
    <param>
      <key>thread_safe_setters</key>
      <value></value>
    </param>
    <param>
      <key>title</key>
      <value></value>
    </param>
  </block>
  <block>
    <key>variable_slider</key>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>converver</key>
      <value>float_converter</value>
    </param>
    <param>
      <key>value</key>
      <value>0</value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(24, 331)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>grid_pos</key>
      <value></value>
    </param>
    <param>
      <key>id</key>
      <value>gen_freq</value>
    </param>
    <param>
      <key>label</key>
      <value>Frequency:</value>
    </param>
    <param>
      <key>max</key>
      <value>samp_rate/2</value>
    </param>
    <param>
      <key>min</key>
      <value>-samp_rate/2</value>
    </param>
    <param>
      <key>notebook</key>
      <value></value>
    </param>
    <param>
      <key>num_steps</key>
      <value>100</value>
    </param>
    <param>
      <key>style</key>
      <value>wx.SL_HORIZONTAL</value>
    </param>
  </block>
  <block>
    <key>variable</key>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(8, 195)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>id</key>
      <value>rate</value>
    </param>
    <param>
      <key>value</key>
      <value>-0.1</value>
    </param>
  </block>
  <block>
    <key>variable</key>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(176, 11)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>id</key>
      <value>samp_rate</value>
    </param>
    <param>
      <key>value</key>
      <value>250e3</value>
    </param>
  </block>
  <block>
    <key>analog_sig_source_x</key>
    <param>
      <key>amp</key>
      <value>1</value>
    </param>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>freq</key>
      <value>gen_freq</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(8, 75)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>id</key>
      <value>analog_sig_source_x_0</value>
    </param>
    <param>
      <key>maxoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>minoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>offset</key>
      <value>0</value>
    </param>
    <param>
      <key>type</key>
      <value>complex</value>
    </param>
    <param>
      <key>samp_rate</key>
      <value>samp_rate</value>
    </param>
    <param>
      <key>waveform</key>
      <value>analog.GR_SIN_WAVE</value>
    </param>
  </block>
  <block>
    <key>blocks_throttle</key>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(224, 107)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>id</key>
      <value>blocks_throttle_0</value>
    </param>
    <param>
      <key>ignoretag</key>
      <value>True</value>
    </param>
    <param>
      <key>maxoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>minoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>samples_per_second</key>
      <value>samp_rate</value>
    </param>
    <param>
      <key>type</key>
      <value>complex</value>
    </param>
    <param>
      <key>vlen</key>
      <value>1</value>
    </param>
  </block>
  <block>
    <key>ha5kfu_execproc_xx</key>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>commandline</key>
      <value>"csdr shift_addition_cc %g"%rate</value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(824, 315)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>id</key>
      <value>ha5kfu_execproc_xx_0_0</value>
    </param>
    <param>
      <key>maxoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>minoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>type</key>
      <value>cc</value>
    </param>
  </block>
  <block>
    <key>ha5kfu_execproc_xx</key>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>commandline</key>
      <value>ncat -v raspberrypi.local 5321</value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(536, 443)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>id</key>
      <value>ha5kfu_execproc_xx_0_0_0_1</value>
    </param>
    <param>
      <key>maxoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>minoutbuf</key>
      <value>0</value>
    </param>
    <param>
      <key>type</key>
      <value>cc</value>
    </param>
  </block>
  <block>
    <key>notebook</key>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(272, 11)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>grid_pos</key>
      <value></value>
    </param>
    <param>
      <key>id</key>
      <value>nb0</value>
    </param>
    <param>
      <key>labels</key>
      <value>['original', 'shift_addition_cc','shift_addfast_cc',]</value>
    </param>
    <param>
      <key>notebook</key>
      <value></value>
    </param>
    <param>
      <key>style</key>
      <value>wx.NB_TOP</value>
    </param>
  </block>
  <block>
    <key>wxgui_fftsink2</key>
    <param>
      <key>avg_alpha</key>
      <value>0</value>
    </param>
    <param>
      <key>average</key>
      <value>False</value>
    </param>
    <param>
      <key>baseband_freq</key>
      <value>0</value>
    </param>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>fft_size</key>
      <value>1024</value>
    </param>
    <param>
      <key>freqvar</key>
      <value>None</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(848, 27)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>grid_pos</key>
      <value></value>
    </param>
    <param>
      <key>id</key>
      <value>wxgui_fftsink2_0_0</value>
    </param>
    <param>
      <key>notebook</key>
      <value>nb0,0</value>
    </param>
    <param>
      <key>peak_hold</key>
      <value>False</value>
    </param>
    <param>
      <key>ref_level</key>
      <value>0</value>
    </param>
    <param>
      <key>ref_scale</key>
      <value>2.0</value>
    </param>
    <param>
      <key>fft_rate</key>
      <value>15</value>
    </param>
    <param>
      <key>samp_rate</key>
      <value>samp_rate</value>
    </param>
    <param>
      <key>title</key>
      <value>FFT Plot</value>
    </param>
    <param>
      <key>type</key>
      <value>complex</value>
    </param>
    <param>
      <key>win_size</key>
      <value></value>
    </param>
    <param>
      <key>win</key>
      <value>None</value>
    </param>
    <param>
      <key>y_divs</key>
      <value>10</value>
    </param>
    <param>
      <key>y_per_div</key>
      <value>10</value>
    </param>
  </block>
  <block>
    <key>wxgui_fftsink2</key>
    <param>
      <key>avg_alpha</key>
      <value>0</value>
    </param>
    <param>
      <key>average</key>
      <value>False</value>
    </param>
    <param>
      <key>baseband_freq</key>
      <value>0</value>
    </param>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>fft_size</key>
      <value>1024</value>
    </param>
    <param>
      <key>freqvar</key>
      <value>None</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(1112, 339)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>grid_pos</key>
      <value></value>
    </param>
    <param>
      <key>id</key>
      <value>wxgui_fftsink2_0_1</value>
    </param>
    <param>
      <key>notebook</key>
      <value>nb0,1</value>
    </param>
    <param>
      <key>peak_hold</key>
      <value>False</value>
    </param>
    <param>
      <key>ref_level</key>
      <value>0</value>
    </param>
    <param>
      <key>ref_scale</key>
      <value>2.0</value>
    </param>
    <param>
      <key>fft_rate</key>
      <value>15</value>
    </param>
    <param>
      <key>samp_rate</key>
      <value>samp_rate</value>
    </param>
    <param>
      <key>title</key>
      <value>FFT Plot</value>
    </param>
    <param>
      <key>type</key>
      <value>complex</value>
    </param>
    <param>
      <key>win_size</key>
      <value></value>
    </param>
    <param>
      <key>win</key>
      <value>None</value>
    </param>
    <param>
      <key>y_divs</key>
      <value>10</value>
    </param>
    <param>
      <key>y_per_div</key>
      <value>10</value>
    </param>
  </block>
  <block>
    <key>wxgui_fftsink2</key>
    <param>
      <key>avg_alpha</key>
      <value>0</value>
    </param>
    <param>
      <key>average</key>
      <value>False</value>
    </param>
    <param>
      <key>baseband_freq</key>
      <value>0</value>
    </param>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>fft_size</key>
      <value>1024</value>
    </param>
    <param>
      <key>freqvar</key>
      <value>None</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(808, 387)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>grid_pos</key>
      <value></value>
    </param>
    <param>
      <key>id</key>
      <value>wxgui_fftsink2_0_1_1</value>
    </param>
    <param>
      <key>notebook</key>
      <value>nb0,2</value>
    </param>
    <param>
      <key>peak_hold</key>
      <value>False</value>
    </param>
    <param>
      <key>ref_level</key>
      <value>0</value>
    </param>
    <param>
      <key>ref_scale</key>
      <value>2.0</value>
    </param>
    <param>
      <key>fft_rate</key>
      <value>15</value>
    </param>
    <param>
      <key>samp_rate</key>
      <value>samp_rate</value>
    </param>
    <param>
      <key>title</key>
      <value>FFT Plot</value>
    </param>
    <param>
      <key>type</key>
      <value>complex</value>
    </param>
    <param>
      <key>win_size</key>
      <value></value>
    </param>
    <param>
      <key>win</key>
      <value>None</value>
    </param>
    <param>
      <key>y_divs</key>
      <value>10</value>
    </param>
    <param>
      <key>y_per_div</key>
      <value>10</value>
    </param>
  </block>
  <block>
    <key>wxgui_scopesink2</key>
    <param>
      <key>ac_couple</key>
      <value>False</value>
    </param>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(1112, 555)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>grid_pos</key>
      <value></value>
    </param>
    <param>
      <key>id</key>
      <value>wxgui_scopesink2_0_0</value>
    </param>
    <param>
      <key>notebook</key>
      <value>nb0,1</value>
    </param>
    <param>
      <key>num_inputs</key>
      <value>1</value>
    </param>
    <param>
      <key>samp_rate</key>
      <value>samp_rate</value>
    </param>
    <param>
      <key>t_scale</key>
      <value>0</value>
    </param>
    <param>
      <key>title</key>
      <value>Scope Plot</value>
    </param>
    <param>
      <key>trig_mode</key>
      <value>wxgui.TRIG_MODE_NORM</value>
    </param>
    <param>
      <key>type</key>
      <value>complex</value>
    </param>
    <param>
      <key>v_offset</key>
      <value>0</value>
    </param>
    <param>
      <key>v_scale</key>
      <value>0</value>
    </param>
    <param>
      <key>win_size</key>
      <value></value>
    </param>
    <param>
      <key>xy_mode</key>
      <value>False</value>
    </param>
    <param>
      <key>y_axis_label</key>
      <value>Counts</value>
    </param>
  </block>
  <block>
    <key>wxgui_scopesink2</key>
    <param>
      <key>ac_couple</key>
      <value>False</value>
    </param>
    <param>
      <key>alias</key>
      <value></value>
    </param>
    <param>
      <key>comment</key>
      <value></value>
    </param>
    <param>
      <key>affinity</key>
      <value></value>
    </param>
    <param>
      <key>_enabled</key>
      <value>True</value>
    </param>
    <param>
      <key>_coordinate</key>
      <value>(808, 611)</value>
    </param>
    <param>
      <key>_rotation</key>
      <value>0</value>
    </param>
    <param>
      <key>grid_pos</key>
      <value></value>
    </param>
    <param>
      <key>id</key>
      <value>wxgui_scopesink2_0_0_1</value>
    </param>
    <param>
      <key>notebook</key>
      <value>nb0,2</value>
    </param>
    <param>
      <key>num_inputs</key>
      <value>1</value>
    </param>
    <param>
      <key>samp_rate</key>
      <value>samp_rate</value>
    </param>
    <param>
      <key>t_scale</key>
      <value>0</value>
    </param>
    <param>
      <key>title</key>
      <value>Scope Plot</value>
    </param>
    <param>
      <key>trig_mode</key>
      <value>wxgui.TRIG_MODE_NORM</value>
    </param>
    <param>
      <key>type</key>
      <value>complex</value>
    </param>
    <param>
      <key>v_offset</key>
      <value>0</value>
    </param>
    <param>
      <key>v_scale</key>
      <value>0</value>
    </param>
    <param>
      <key>win_size</key>
      <value></value>
    </param>
    <param>
      <key>xy_mode</key>
      <value>False</value>
    </param>
    <param>
      <key>y_axis_label</key>
      <value>Counts</value>
    </param>
  </block>
  <connection>
    <source_block_id>analog_sig_source_x_0</source_block_id>
    <sink_block_id>blocks_throttle_0</sink_block_id>
    <source_key>0</source_key>
    <sink_key>0</sink_key>
  </connection>
  <connection>
    <source_block_id>blocks_throttle_0</source_block_id>
    <sink_block_id>ha5kfu_execproc_xx_0_0</sink_block_id>
    <source_key>0</source_key>
    <sink_key>0</sink_key>
  </connection>
  <connection>
    <source_block_id>blocks_throttle_0</source_block_id>
    <sink_block_id>ha5kfu_execproc_xx_0_0_0_1</sink_block_id>
    <source_key>0</source_key>
    <sink_key>0</sink_key>
  </connection>
  <connection>
    <source_block_id>blocks_throttle_0</source_block_id>
    <sink_block_id>wxgui_fftsink2_0_0</sink_block_id>
    <source_key>0</source_key>
    <sink_key>0</sink_key>
  </connection>
  <connection>
    <source_block_id>ha5kfu_execproc_xx_0_0</source_block_id>
    <sink_block_id>wxgui_fftsink2_0_1</sink_block_id>
    <source_key>0</source_key>
    <sink_key>0</sink_key>
  </connection>
  <connection>
    <source_block_id>ha5kfu_execproc_xx_0_0</source_block_id>
    <sink_block_id>wxgui_scopesink2_0_0</sink_block_id>
    <source_key>0</source_key>
    <sink_key>0</sink_key>
  </connection>
  <connection>
    <source_block_id>ha5kfu_execproc_xx_0_0_0_1</source_block_id>
    <sink_block_id>wxgui_fftsink2_0_1_1</sink_block_id>
    <source_key>0</source_key>
    <sink_key>0</sink_key>
  </connection>
  <connection>
    <source_block_id>ha5kfu_execproc_xx_0_0_0_1</source_block_id>
    <sink_block_id>wxgui_scopesink2_0_0_1</sink_block_id>
    <source_key>0</source_key>
    <sink_key>0</sink_key>
  </connection>
 </flow_graph>
--- a/grc_tests/test_shift_remote.sh
+++ b/grc_tests/test_shift_remote.sh
@ -0,0 +1,9 @@
 #!/bin/sh
 # Run this script on a Raspberry Pi 2, while running test_shift_remote.grc on your PC. 
 # It allows you to debug the NEON-accelerated version of specific DSP algorithms on the target hardware.
 TEMPSCRIPT="/tmp/test_shift_remote_exec.sh"
 echo '#!/bin/sh\ncsdr shift_addfast_cc -0.1' > $TEMPSCRIPT
 cat $TEMPSCRIPT
 chmod +x $TEMPSCRIPT
 ncat -vvl 5321 -e $TEMPSCRIPT
 rm $TEMPSCRIPT
--- a/libcsdr.c
+++ b/libcsdr.c
@ -263,8 +263,208 @@ float shift_table_cc(complexf* input, complexf* output, int input_size, float ra
 	return phase;
 }
 shift_unroll_data_t shift_unroll_init(float rate, int size)
 {
 	shift_unroll_data_t output;
 	output.phase_increment=2*rate*PI;
 	output.size = size;
 	output.dsin=(float*)malloc(sizeof(float)*size);
 	output.dcos=(float*)malloc(sizeof(float)*size);
 	float myphase = 0;
 	for(int i=0;i<size;i++)
 	{
 		myphase += output.phase_increment;
 		while(myphase>PI) myphase-=2*PI;
 		while(myphase<-PI) myphase+=2*PI;		
 		output.dsin[i]=sin(myphase);
 		output.dcos[i]=cos(myphase);
 	}
 	return output;	
 }
 float shift_unroll_cc(complexf *input, complexf* output, int input_size, shift_unroll_data_t* d, float starting_phase)
 {
 	//input_size should be multiple of 4
 	//fprintf(stderr, "shift_addfast_cc: input_size = %d\n", input_size);
 	float cos_start=cos(starting_phase);
 	float sin_start=sin(starting_phase);
 	register float cos_val, sin_val;
 	for(int i=0;i<input_size; i++) //@shift_unroll_cc
 	{
 		cos_val = cos_start * d->dcos[i] - sin_start * d->dsin[i];
 		sin_val  = sin_start * d->dcos[i] + cos_start * d->dsin[i];
 		iof(output,i)=cos_val*iof(input,i)-sin_val*qof(input,i);
 		qof(output,i)=sin_val*iof(input,i)+cos_val*qof(input,i);
 	}
 	starting_phase+=input_size*d->phase_increment;
 	while(starting_phase>PI) starting_phase-=2*PI;
 	while(starting_phase<-PI) starting_phase+=2*PI;
 	return starting_phase;
 }
 shift_addfast_data_t shift_addfast_init(float rate)
 {
 	shift_addfast_data_t output;
 	output.phase_increment=2*rate*PI;
 	for(int i=0;i<4;i++)
 	{
 		output.dsin[i]=sin(output.phase_increment*(i+1));
 		output.dcos[i]=cos(output.phase_increment*(i+1));
 	}
 	return output;
 }
 #ifdef NEON_OPTS
-#pragma message "We have a faster fir_decimate_cc now."
+#pragma message "Manual NEON optimizations are ON: we have a faster shift_addfast_cc now."
 float shift_addfast_cc(complexf *input, complexf* output, int input_size, shift_addfast_data_t* d, float starting_phase)
 {
 	//input_size should be multiple of 4
 	float cos_start[4], sin_start[4];
 	float cos_vals[4], sin_vals[4];
 	for(int i=0;i<4;i++) 
 	{
 		cos_start[i] = cos(starting_phase);
 		sin_start[i] = sin(starting_phase);
 	}
 	float* pdcos = d->dcos;
 	float* pdsin = d->dsin;
 	register float* pinput = (float*)input;
 	register float* pinput_end = (float*)(input+input_size);
 	register float* poutput = (float*)output;
 	//Register map:
 	#define RDCOS "q0" //dcos, dsin
 	#define RDSIN "q1"
 	#define RCOSST "q2" //cos_start, sin_start
 	#define RSINST "q3"
 	#define RCOSV "q4" //cos_vals, sin_vals
 	#define RSINV "q5"
 	#define ROUTI "q6" //output_i, output_q
 	#define ROUTQ "q7" 
 	#define RINPI "q8" //input_i, input_q
 	#define RINPQ "q9"
 	#define R3(x,y,z) x ", " y ", " z "\n\t"
 	asm volatile( //(the range of q is q0-q15)
 		"		vld1.32	{" RDCOS "}, [%[pdcos]]\n\t"
 		"		vld1.32	{" RDSIN "}, [%[pdsin]]\n\t"
 		"		vld1.32	{" RCOSST "}, [%[cos_start]]\n\t"
 		"		vld1.32	{" RSINST "}, [%[sin_start]]\n\t"
 		"for_addfast: vld2.32 {" RINPI "-" RINPQ "}, [%[pinput]]!\n\t" //load q0 and q1 directly from the memory address stored in pinput, with interleaving (so that we get the I samples in RINPI and the Q samples in RINPQ), also increment the memory address in pinput (hence the "!" mark) 
 		//C version:
 		//cos_vals[j] = cos_start * d->dcos[j] - sin_start * d->dsin[j];
 		//sin_vals[j] = sin_start * d->dcos[j] + cos_start * d->dsin[j];
 		"		vmul.f32 " R3(RCOSV, RCOSST, RDCOS)  //cos_vals[i] = cos_start * d->dcos[i]
 		"		vmls.f32 " R3(RCOSV, RSINST, RDSIN)  //cos_vals[i] -= sin_start * d->dsin[i]
 		"		vmul.f32 " R3(RSINV, RSINST, RDCOS)  //sin_vals[i] = sin_start * d->dcos[i]
 		"		vmla.f32 " R3(RSINV, RCOSST, RDSIN)  //sin_vals[i] += cos_start * d->dsin[i]
 		//C version:
 		//iof(output,4*i+j)=cos_vals[j]*iof(input,4*i+j)-sin_vals[j]*qof(input,4*i+j);
 		//qof(output,4*i+j)=sin_vals[j]*iof(input,4*i+j)+cos_vals[j]*qof(input,4*i+j);	
 		"		vmul.f32 " R3(ROUTI, RCOSV, RINPI) //output_i =  cos_vals * input_i
 		"		vmls.f32 " R3(ROUTI, RSINV, RINPQ) //output_i -= sin_vals * input_q
 		"		vmul.f32 " R3(ROUTQ, RSINV, RINPI) //output_q =  sin_vals * input_i
 		"		vmla.f32 " R3(ROUTQ, RCOSV, RINPQ) //output_i += cos_vals * input_q
 		"		vst2.32 {" ROUTI "-" ROUTQ "}, [%[poutput]]!\n\t" //store the outputs in memory
 		//"		add %[poutput],%[poutput],#32\n\t"
 		"		vdup.32 " RCOSST ", d9[1]\n\t" // cos_start[0-3] = cos_vals[3]
 		"		vdup.32 " RSINST ", d11[1]\n\t" // sin_start[0-3] = sin_vals[3]
 		"		cmp %[pinput], %[pinput_end]\n\t" //if(pinput != pinput_end)
 		"		bcc for_addfast\n\t"			  //	then goto for_addfast
 	:
 		[pinput]"+r"(pinput), [poutput]"+r"(poutput) //output operand list -> C variables that we will change from ASM
 	:
 		[pinput_end]"r"(pinput_end), [pdcos]"r"(pdcos), [pdsin]"r"(pdsin), [sin_start]"r"(sin_start), [cos_start]"r"(cos_start) //input operand list
 	: 
 		"memory", "q0", "q1", "q2", "q4", "q5", "q6", "q7", "q8", "q9", "cc" //clobber list
 	);
 	starting_phase+=input_size*d->phase_increment;
 	while(starting_phase>PI) starting_phase-=2*PI;
 	while(starting_phase<-PI) starting_phase+=2*PI;
 	return starting_phase;
 }
 #else
 #if 1
 #define SADF_L1(j) cos_vals_ ## j = cos_start * dcos_ ## j - sin_start * dsin_ ## j; \
 	sin_vals_ ## j = sin_start * dcos_ ## j + cos_start * dsin_ ## j;
 #define SADF_L2(j) iof(output,4*i+j)=(cos_vals_ ## j)*iof(input,4*i+j)-(sin_vals_ ## j)*qof(input,4*i+j); \
 	qof(output,4*i+j)=(sin_vals_ ## j)*iof(input,4*i+j)+(cos_vals_ ## j)*qof(input,4*i+j);
 float shift_addfast_cc(complexf *input, complexf* output, int input_size, shift_addfast_data_t* d, float starting_phase)
 {
 	//input_size should be multiple of 4
 	//fprintf(stderr, "shift_addfast_cc: input_size = %d\n", input_size);
 	float cos_start=cos(starting_phase);
 	float sin_start=sin(starting_phase);
 	float register cos_vals_0, cos_vals_1, cos_vals_2, cos_vals_3,
 		sin_vals_0, sin_vals_1, sin_vals_2, sin_vals_3, 
 		dsin_0 = d->dsin[0], dsin_1 = d->dsin[1], dsin_2 = d->dsin[2], dsin_3 = d->dsin[3],
 		dcos_0 = d->dcos[0], dcos_1 = d->dcos[1], dcos_2 = d->dcos[2], dcos_3 = d->dcos[3];
 	for(int i=0;i<input_size/4; i++) //@shift_addfast_cc
 	{
 		SADF_L1(0)
 		SADF_L1(1)
 		SADF_L1(2)
 		SADF_L1(3)
 		SADF_L2(0)
 		SADF_L2(1)
 		SADF_L2(2)
 		SADF_L2(3)
 		cos_start = cos_vals_3;
 		sin_start = sin_vals_3;
 	}
 	starting_phase+=input_size*d->phase_increment;
 	while(starting_phase>PI) starting_phase-=2*PI;
 	while(starting_phase<-PI) starting_phase+=2*PI;
 	return starting_phase;
 }
 #else
 float shift_addfast_cc(complexf *input, complexf* output, int input_size, shift_addfast_data_t* d, float starting_phase)
 {
 	//input_size should be multiple of 4
 	//fprintf(stderr, "shift_addfast_cc: input_size = %d\n", input_size);
 	float cos_start=cos(starting_phase);
 	float sin_start=sin(starting_phase);
 	float cos_vals[4], sin_vals[4];
 	for(int i=0;i<input_size/4; i++) //@shift_addfast_cc
 	{
 		for(int j=0;j<4;j++) //@shift_addfast_cc
 		{
 			cos_vals[j] = cos_start * d->dcos[j] - sin_start * d->dsin[j];
 			sin_vals[j] = sin_start * d->dcos[j] + cos_start * d->dsin[j];
 		}
 		for(int j=0;j<4;j++) //@shift_addfast_cc
 		{
 			iof(output,4*i+j)=cos_vals[j]*iof(input,4*i+j)-sin_vals[j]*qof(input,4*i+j);
 			qof(output,4*i+j)=sin_vals[j]*iof(input,4*i+j)+cos_vals[j]*qof(input,4*i+j);
 		}
 		cos_start = cos_vals[3];
 		sin_start = sin_vals[3];
 	}
 	starting_phase+=input_size*d->phase_increment;
 	while(starting_phase>PI) starting_phase-=2*PI;
 	while(starting_phase<-PI) starting_phase+=2*PI;
 	return starting_phase;
 }
 #endif
 #endif
 #ifdef NEON_OPTS
 #pragma message "Manual NEON optimizations are ON: we have a faster fir_decimate_cc now."
 //max help: http://community.arm.com/groups/android-community/blog/2015/03/27/arm-neon-programming-quick-reference
@ -280,11 +480,7 @@ int fir_decimate_cc(complexf *input, complexf *output, int input_size, int decim
 	for(int i=0; i<input_size; i+=decimation) //@fir_decimate_cc: outer loop
 	{
 		if(i+taps_length>input_size) break;
-		register float acci=0;
+		register float* pinput=(float*)&(input[i]);
 		register float accq=0;
 		register int ti=0;
 		register float* pinput=(float*)&(input[i+ti]);
 		register float* ptaps=taps;
 		register float* ptaps_end=taps+taps_length;
 		float quad_acciq [8];
@ -297,13 +493,13 @@ q4, q5: accumulator for I branch and Q branch (will be the output)
 */
 		asm volatile(
-			"		vmov.f32 q4, #0.0\n\t" //another way to null the accumulators
+			"		veor q4, q4\n\t"
-			"		vmov.f32 q5, #0.0\n\t"
+			"		veor q5, q5\n\t"
 			"for_fdccasm: vld2.32	{q0-q1}, [%[pinput]]!\n\t" //load q0 and q1 directly from the memory address stored in pinput, with interleaving (so that we get the I samples in q0 and the Q samples in q1), also increment the memory address in pinput (hence the "!" mark) //http://community.arm.com/groups/processors/blog/2010/03/17/coding-for-neon--part-1-load-and-stores
 			"		vld1.32	{q2}, [%[ptaps]]!\n\t"
 			"		vmla.f32 q4, q0, q2\n\t" //quad_acc_i += quad_input_i * quad_taps_1 //http://stackoverflow.com/questions/3240440/how-to-use-the-multiply-and-accumulate-intrinsics-in-arm-cortex-a8 //http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0489e/CIHEJBIE.html
 			"		vmla.f32 q5, q1, q2\n\t" //quad_acc_q += quad_input_q * quad_taps_1
-			"		cmp %[ptaps], %[ptaps_end]\n\t" //if(ptaps == ptaps_end)
+			"		cmp %[ptaps], %[ptaps_end]\n\t" //if(ptaps != ptaps_end)
 			"		bcc for_fdccasm\n\t"			//	then goto for_fdcasm
 			"		vst1.32 {q4}, [%[quad_acci]]\n\t" //if the loop is finished, store the two accumulators in memory
 			"		vst1.32 {q5}, [%[quad_accq]]\n\t"
@ -454,7 +650,7 @@ float inline fir_one_pass_ff(float* input, float* taps, int taps_length)
 	return acc;
 }
-fractional_decimator_ff_t fractional_decimator_ff(float* input, float* output, int input_size, float rate, float *taps, int taps_length, fractional_decimator_ff_t d)
+old_fractional_decimator_ff_t old_fractional_decimator_ff(float* input, float* output, int input_size, float rate, float *taps, int taps_length, old_fractional_decimator_ff_t d)
 {
 	if(rate<=1.0) return d; //sanity check, can't decimate <=1.0
 	//This routine can handle floating point decimation rates.
@ -487,6 +683,104 @@ fractional_decimator_ff_t fractional_decimator_ff(float* input, float* output, i
 	return d;
 }
 fractional_decimator_ff_t fractional_decimator_ff_init(float rate, int num_poly_points, float* taps, int taps_length)
 {
 	fractional_decimator_ff_t d;
 	d.num_poly_points = num_poly_points&~1; //num_poly_points needs to be even!
 	d.poly_precalc_denomiator = (float*)malloc(d.num_poly_points*sizeof(float));
 	//x0..x3
 	//-1,0,1,2
 	//-(4/2)+1
 	//x0..x5
 	//-2,-1,0,1,2,3
 	d.xifirst=-(num_poly_points/2)+1, d.xilast=num_poly_points/2;
 	int id = 0; //index in poly_precalc_denomiator
 	for(int xi=d.xifirst;xi<=d.xilast;xi++)
 	{
 		d.poly_precalc_denomiator[id]=1;
 		for(int xj=d.xifirst;xj<=d.xilast;xj++)
 		{
 			if(xi!=xj) d.poly_precalc_denomiator[id] *= (xi-xj); //poly_precalc_denomiator could be integer as well. But that would later add a necessary conversion.
 		}
 		id++;
 	}
 	d.where=-d.xifirst;
 	d.coeffs_buf=(float*)malloc(d.num_poly_points*sizeof(float)); 
 	d.filtered_buf=(float*)malloc(d.num_poly_points*sizeof(float)); 
 	//d.last_inputs_circbuf = (float)malloc(d.num_poly_points*sizeof(float));
 	//d.last_inputs_startsat = 0; 
 	//d.last_inputs_samplewhere = -1;
 	//for(int i=0;i<num_poly_points; i++) d.last_inputs_circbuf[i] = 0;
 	d.rate = rate;
 	d.taps = taps;
 	d.taps_length = taps_length;
 	d.input_processed = 0;
 	return d;
 }
 #define DEBUG_ASSERT 1
 void fractional_decimator_ff(float* input, float* output, int input_size, fractional_decimator_ff_t* d)
 {
 	//This routine can handle floating point decimation rates.
 	//It applies polynomial interpolation to samples that are taken into consideration from a pre-filtered input.
 	//The pre-filter can be switched off by applying taps=NULL.
 	//fprintf(stderr, "drate=%f\n", d->rate);
 	if(DEBUG_ASSERT) assert(d->rate > 1.0); 
 	if(DEBUG_ASSERT) assert(d->where >= -d->xifirst);
 	int oi=0; //output index
 	int index_high; 
 #define FD_INDEX_LOW (index_high-1)
 	//we optimize to calculate ceilf(where) only once every iteration, so we do it here:
 	for(;(index_high=ceilf(d->where))+d->num_poly_points+d->taps_length<input_size;d->where+=d->rate) //@fractional_decimator_ff
 	{
 		//d->num_poly_points above is theoretically more than we could have here, but this makes the spectrum look good
 		int sxifirst = FD_INDEX_LOW + d->xifirst; 
 		int sxilast = FD_INDEX_LOW + d->xilast; 
 		if(d->taps) 
 			for(int wi=0;wi<d->num_poly_points;wi++) d->filtered_buf[wi] = fir_one_pass_ff(input+FD_INDEX_LOW+wi, d->taps, d->taps_length);
 		else
 			for(int wi=0;wi<d->num_poly_points;wi++) d->filtered_buf[wi] = *(input+FD_INDEX_LOW+wi);
 		int id=0;
 		float xwhere = d->where - FD_INDEX_LOW;
 		for(int xi=d->xifirst;xi<=d->xilast;xi++)
 		{
 			d->coeffs_buf[id]=1;
 			for(int xj=d->xifirst;xj<=d->xilast;xj++)
 			{
 				if(xi!=xj) d->coeffs_buf[id] *= (xwhere-xj);
 			}
 			id++;		
 		}
 		float acc = 0;
 		for(int i=0;i<d->num_poly_points;i++)
 		{
 			acc += (d->coeffs_buf[i]/d->poly_precalc_denomiator[i])*d->filtered_buf[i];  //(xnom/xden)*yn
 		}
 		output[oi++]=acc;
 	}
 	d->input_processed = FD_INDEX_LOW + d->xifirst;
 	d->where -= d->input_processed;
 	d->output_size = oi;
 }
 /*
 * Some notes to myself on the circular buffer I wanted to implement here:
 		int last_input_samplewhere_shouldbe = (index_high-1)+xifirst;
 		int last_input_offset = last_input_samplewhere_shouldbe - d->last_input_samplewhere;
 		if(last_input_offset < num_poly_points)
 		{
 			//if we can move the last_input circular buffer, we move, and add the new samples at the end
 			d->last_inputs_startsat += last_input_offset;
 			d->last_inputs_startsat %= num_poly_points;
 			int num_copied_samples = 0;
 			for(int i=0; i<last_input_offset; i++)
 			{
 				d->last_inputs_circbuf[i]=
 			}
 			d->last_input_samplewhere = d->las
 		}
 	However, I think I should just rather do a continuous big buffer.
 */
 void apply_fir_fft_cc(FFT_PLAN_T* plan, FFT_PLAN_T* plan_inverse, complexf* taps_fft, complexf* last_overlap, int overlap_size)
 {
@ -930,6 +1224,29 @@ void apply_window_c(complexf* input, complexf* output, int size, window_t window
 	}
 }
 float *precalculate_window(int size, window_t window)
 {
 	float (*window_function)(float)=firdes_get_window_kernel(window);
 	float *windowt;
 	windowt = malloc(sizeof(float) * size);
 	for(int i=0;i<size;i++) //@precalculate_window
 	{
 		float rate=(float)i/(size-1);
 		windowt[i] = window_function(2.0*rate+1.0);
 	}
 	return windowt;
 }
 void apply_precalculated_window_c(complexf* input, complexf* output, int size, float *windowt)
 {
 	for(int i=0;i<size;i++) //@apply_precalculated_window_c
 	{
 		iof(output,i)=iof(input,i)*windowt[i];
 		qof(output,i)=qof(input,i)*windowt[i];
 	}
 }
 void apply_window_f(float* input, float* output, int size, window_t window)
 {
 	float (*window_function)(float)=firdes_get_window_kernel(window);
@ -949,6 +1266,19 @@ void logpower_cf(complexf* input, float* output, int size, float add_db)
 	for(int i=0;i<size;i++) output[i]=10*output[i]+add_db; //@logpower_cf: pass 3
 }
 void accumulate_power_cf(complexf* input, float* output, int size)
 {
 	for(int i=0;i<size;i++) output[i] += iof(input,i)*iof(input,i) + qof(input,i)*qof(input,i); //@logpower_cf: pass 1
 }
 void log_ff(float* input, float* output, int size, float add_db) {
 	for(int i=0;i<size;i++) output[i]=log10(input[i]); //@logpower_cf: pass 2
 	for(int i=0;i<size;i++) output[i]=10*output[i]+add_db; //@logpower_cf: pass 3
 }
 /*
  _____        _                                            _
 |  __ \      | |                                          (_)
--- a/libcsdr.h
+++ b/libcsdr.h
@ -68,6 +68,8 @@ typedef struct complexf_s { float i; float q; } complexf;
 //they dropped M_PI in C99, so we define it:
 #define PI ((float)3.14159265358979323846)
 #define TIME_TAKEN(start,end) ((end.tv_sec-start.tv_sec)+(end.tv_nsec-start.tv_nsec)/1e9)
 //window
 typedef enum window_s
 {
@ -140,17 +142,42 @@ typedef struct rational_resampler_ff_s
 rational_resampler_ff_t rational_resampler_ff(float *input, float *output, int input_size, int interpolation, int decimation, float *taps, int taps_length, int last_taps_delay);
 void rational_resampler_get_lowpass_f(float* output, int output_size, int interpolation, int decimation, window_t window);
 float *precalculate_window(int size, window_t window);
 void apply_window_c(complexf* input, complexf* output, int size, window_t window);
 void apply_precalculated_window_c(complexf* input, complexf* output, int size, float *windowt);
 void apply_window_f(float* input, float* output, int size, window_t window);
 void logpower_cf(complexf* input, float* output, int size, float add_db);
 void accumulate_power_cf(complexf* input, float* output, int size);
 void log_ff(float* input, float* output, int size, float add_db);
 typedef struct fractional_decimator_ff_s
 {
 	float where;
 	int input_processed;
 	int output_size;
 	int num_poly_points; //number of samples that the Lagrange interpolator will use
 	float* poly_precalc_denomiator; //while we don't precalculate coefficients here as in a Farrow structure, because it is a fractional interpolator, but we rather precaculate part of the interpolator expression
 	//float* last_inputs_circbuf; //circular buffer to store the last (num_poly_points) number of input samples.
 	//int last_inputs_startsat; //where the circular buffer starts now
 	//int last_inputs_samplewhere; 
 	float* coeffs_buf;
 	float* filtered_buf;
 	int xifirst; 
 	int xilast; 
 	float rate;
 	float *taps;
 	int taps_length;
 } fractional_decimator_ff_t;
 fractional_decimator_ff_t fractional_decimator_ff_init(float rate, int num_poly_points, float* taps, int taps_length);
 void fractional_decimator_ff(float* input, float* output, int input_size, fractional_decimator_ff_t* d);
 typedef struct old_fractional_decimator_ff_s
 {
 	float remain;
 	int input_processed;
 	int output_size;
-} fractional_decimator_ff_t;
+} old_fractional_decimator_ff_t;
-fractional_decimator_ff_t fractional_decimator_ff(float* input, float* output, int input_size, float rate, float *taps, int taps_length, fractional_decimator_ff_t d);
+old_fractional_decimator_ff_t old_fractional_decimator_ff(float* input, float* output, int input_size, float rate, float *taps, int taps_length, old_fractional_decimator_ff_t d);
 typedef struct shift_table_data_s
 {
@ -161,6 +188,25 @@ void shift_table_deinit(shift_table_data_t table_data);
 shift_table_data_t shift_table_init(int table_size);
 float shift_table_cc(complexf* input, complexf* output, int input_size, float rate, shift_table_data_t table_data, float starting_phase);
 typedef struct shift_addfast_data_s
 {
 	float dsin[4];
 	float dcos[4];
 	float phase_increment;
 } shift_addfast_data_t;
 shift_addfast_data_t shift_addfast_init(float rate);
 shift_addfast_data_t shift_addfast_init(float rate);
 float shift_addfast_cc(complexf *input, complexf* output, int input_size, shift_addfast_data_t* d, float starting_phase);
 typedef struct shift_unroll_data_s
 {
 	float* dsin;
 	float* dcos;
 	float phase_increment;
 	int size;
 } shift_unroll_data_t;
 float shift_unroll_cc(complexf *input, complexf* output, int input_size, shift_unroll_data_t* d, float starting_phase);
 shift_unroll_data_t shift_unroll_init(float rate, int size);
 int log2n(int x);
 int next_pow2(int x);
--- a/libcsdr_wrapper.c
+++ b/libcsdr_wrapper.c
@ -1,4 +1,5 @@
 #include "libcsdr.c"
 #include "libcsdr_gpl.c"
 #include "ima_adpcm.c"
 #include "fastddc.c"
 //this wrapper helps parsevect.py to generate better output
--- a/2
+++ b/2
@ -0,0 +1,2 @@
 #!/bin/bash
 gcc test200.c --std=gnu99 -o test200 -DUSE_FFTW -DLIBCSDR_GPL -lcsdr
--- a/nmux-todo.md
+++ b/nmux-todo.md
@ -0,0 +1,7 @@
 Remove nmux repo, it will rather be part of csdr
 Try in OpenWebRX
 Add UDP support
 Evaluate performance against ncat
 Remove debug messages
 Document README.md
 Test with a limited number of people
--- a/nmux.cpp
+++ b/nmux.cpp
@ -0,0 +1,374 @@
 /*
 This software is part of libcsdr, a set of simple DSP routines for
 Software Defined Radio.
 Copyright (c) 2014, Andras Retzler <randras@sdr.hu>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of the copyright holder nor the
      names of its contributors may be used to endorse or promote products
      derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED. IN NO EVENT SHALL ANDRAS RETZLER BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 #include "nmux.h"
 char help_text[]="nmux is a TCP stream multiplexer. It reads data from the standard input, and sends it to each client connected through TCP sockets. Available command line options are:\n"
 "\t--port (-p), --address (-a): TCP port and address to listen.\n"
 "\t--bufsize (-b), --bufcnt (-n): Internal buffer size and count.\n"
 "\t--help (-h): Show this message.\n";
 int host_port = 0;
 char host_address[100] = "127.0.0.1";
 int thread_cntr = 0;
 //CLI parameters
 int bufsize = 1024; 
 int bufcnt = 1024;
 char** global_argv;
 int global_argc;
 tsmpool* pool;
 pthread_cond_t wait_condition;
 pthread_mutex_t wait_mutex;
 void sig_handler(int signo)
 {
 	fprintf(stderr, MSG_START "signal %d caught, exiting...\n", signo);
 	fflush(stderr);
 	exit(0);
 }
 int main(int argc, char* argv[])
 {
 	global_argv = argv;
 	global_argc = argc;
 	int c;
 	int no_options = 1;
 	for(;;)
 	{
 		int option_index = 0;
 		static struct option long_options[] = {
 		   {"port",       required_argument, 0,  'p' },
 		   {"address",    required_argument, 0,  'a' },
 		   {"bufsize", 	  required_argument, 0,  'b' },
 		   {"bufcnt", 	  required_argument, 0,  'n' },
 		   {"help", 	  no_argument, 		 0,  'h' },
 		   {0,			  0,                 0,  0   }
 		};
 		c = getopt_long(argc, argv, "p:a:b:n:h", long_options, &option_index);
 		if(c==-1) break;
 		no_options = 0;
 		switch (c)
 		{
 		case 'a':
 			host_address[100-1]=0;
 			strncpy(host_address,optarg,100-1);
 			break;
 		case 'p':
 			host_port=atoi(optarg);
 			break;
 		case 'b':
 			bufsize=atoi(optarg);
 			break;
 		case 'n':
 			bufcnt=atoi(optarg);
 			break;
 		case 'h':
 			print_exit(help_text);
 			break;
 		case 0:
 		case '?':
 		case ':':
 		default:
 			print_exit(MSG_START "error in getopt_long()\n");
 		}
 	}
 	if(no_options) print_exit(help_text);
 	if(!host_port) print_exit(MSG_START "missing required command line argument, --port.\n");
 	if(bufsize<=0) print_exit(MSG_START "invalid value for --bufsize (should be >0)\n");
 	if(bufcnt<=0) print_exit(MSG_START "invalid value for --bufcnt (should be >0)\n");
 	//set signals
 	struct sigaction sa;
 	memset(&sa, 0, sizeof(sa));
 	sa.sa_handler = sig_handler;
 	sigaction(SIGTERM, &sa, NULL);
 	sigaction(SIGKILL, &sa, NULL);
 	sigaction(SIGQUIT, &sa, NULL);
 	sigaction(SIGINT, &sa, NULL);
 	sigaction(SIGHUP, &sa, NULL);
 	struct sockaddr_in addr_host;
    int listen_socket;
 	std::vector<client_t*> clients;
 	clients.reserve(100);
    listen_socket=socket(AF_INET,SOCK_STREAM,0);
 	int sockopt = 1;
 	if( setsockopt(listen_socket, SOL_SOCKET, SO_REUSEADDR, (char *)&sockopt, sizeof(sockopt)) == -1 )
 		error_exit(MSG_START "cannot set SO_REUSEADDR");  //the best description on SO_REUSEADDR ever: http://stackoverflow.com/a/14388707/3182453
 	memset(&addr_host,'0',sizeof(addr_host));
    addr_host.sin_family = AF_INET;
    addr_host.sin_port = htons(host_port);
 	addr_host.sin_addr.s_addr = INADDR_ANY;
    if( (addr_host.sin_addr.s_addr=inet_addr(host_address)) == INADDR_NONE )
 		error_exit(MSG_START "invalid host address");
 	if( bind(listen_socket, (struct sockaddr*) &addr_host, sizeof(addr_host)) < 0 )
 		error_exit(MSG_START "cannot bind() address to the socket");
 	if( listen(listen_socket, 10) == -1 )
 		error_exit(MSG_START "cannot listen() on socket");
 	fprintf(stderr, MSG_START "listening on %s:%d\n", inet_ntoa(addr_host.sin_addr), host_port);
 	struct sockaddr_in addr_cli;
 	socklen_t addr_cli_len = sizeof(addr_cli);
 	int new_socket;
 	int highfd = 0;
 	maxfd(&highfd, listen_socket);
 	maxfd(&highfd, STDIN_FILENO);
 	fd_set select_fds;
 	//Set stdin and listen_socket to non-blocking
 	if(set_nonblocking(STDIN_FILENO) || set_nonblocking(listen_socket))
 		error_exit(MSG_START "cannot set_nonblocking()");
 	//Create tsmpool
 	pool = new tsmpool(bufsize, bufcnt);
 	if(!pool->is_ok()) print_exit(MSG_START "tsmpool failed to initialize\n");
 	unsigned char* current_write_buffer = (unsigned char*)pool->get_write_buffer();
 	int index_in_current_write_buffer = 0;
 	//Create wait condition: client threads waiting for input data from the main thread will be
 	//	waiting on this condition. They will be woken up with pthread_cond_broadcast() if new
 	//	data arrives.
 	if(pthread_cond_init(&wait_condition, NULL)) 
 		print_exit(MSG_START "pthread_cond_init failed"); //cond_attrs is ignored by Linux
 	if(pthread_mutex_init(&wait_mutex, NULL))
 		print_exit(MSG_START "pthread_mutex_t failed"); //cond_attrs is ignored by Linux
 	for(;;)
 	{
 		FD_ZERO(&select_fds);
 		FD_SET(listen_socket, &select_fds);
 		FD_SET(STDIN_FILENO, &select_fds);
 		if(NMUX_DEBUG) fprintf(stderr, "mainfor: selecting...");
 		//Let's wait until there is any new data to read, or any new connection!
 		int select_ret = select(highfd, &select_fds, NULL, NULL, NULL);
 		if(NMUX_DEBUG) fprintf(stderr, "selected.\n");
 		if(select_ret == -1) error_exit("mainfor select() error");
 		//Is there a new client connection?
 		if( FD_ISSET(listen_socket, &select_fds) && ((new_socket = accept(listen_socket, (struct sockaddr*)&addr_cli, &addr_cli_len)) != -1) )
 		{
 			if(NMUX_DEBUG) 
 			{
 				fprintf(stderr, "\x1b[1m\x1b[33mmainfor: clients before closing: ");
 				for(int i=0;i<clients.size();i++) fprintf(stderr, "0x%x ", (intptr_t)clients[i]);
 				fprintf(stderr, "\x1b[0m\n");
 			}
 			if(NMUX_DEBUG) fprintf(stderr, "mainfor: accepted (socket = %d).\n", new_socket);
 			//Close all finished clients
 			for(int i=0;i<clients.size();i++)
 			{
 				if(clients[i]->status == CS_THREAD_FINISHED)
 				{
 					if(NMUX_DEBUG) fprintf(stderr, "mainfor: client removed: %d\n", i);
 					//client destructor
 					pool->remove_thread(clients[i]->tsmthread);
 					clients.erase(clients.begin()+i);
 					i--;
 				}
 			}
 			if(NMUX_DEBUG) 
 			{
 				fprintf(stderr, "\x1b[1m\x1b[33mmainfor: clients after closing: ");
 				for(int i=0;i<clients.size();i++) fprintf(stderr, "0x%x ", (intptr_t)clients[i]);
 				fprintf(stderr, "\x1b[0m\n");
 			}
 			//We're the parent, let's create a new client and initialize it
 			client_t* new_client = new client_t;
 			new_client->error = 0;
 			memcpy(&new_client->addr, &addr_cli, sizeof(struct sockaddr_in));
 			new_client->socket = new_socket;
 			new_client->status = CS_CREATED;
 			new_client->tsmthread = pool->register_thread();
 			new_client->lpool = pool;
 			new_client->sleeping = 0;
 			if(pthread_create(&new_client->thread, NULL, client_thread, (void*)new_client)==0)
 			{
 				clients.push_back(new_client);
 				fprintf(stderr, MSG_START "pthread_create() done, clients now: %d\n", clients.size());
 			}
 			else
 			{
 				fprintf(stderr, MSG_START "pthread_create() failed.\n");
 				pool->remove_thread(new_client->tsmthread);
 				delete new_client;
 			}
 		}
 		if( FD_ISSET(STDIN_FILENO, &select_fds) )
 		{
 			if(index_in_current_write_buffer >= bufsize)
 			{
 				if(NMUX_DEBUG) fprintf(stderr, "mainfor: gwbing...");
 				current_write_buffer = (unsigned char*)pool->get_write_buffer();
 				if(NMUX_DEBUG) fprintf(stderr, "gwbed.\nmainfor: cond broadcasting...");
 				pthread_mutex_lock(&wait_mutex);
 				pthread_cond_broadcast(&wait_condition); 
 				pthread_mutex_unlock(&wait_mutex);
 				if(NMUX_DEBUG) fprintf(stderr, "cond broadcasted.\n");
 					//Shouldn't we do it after we put data in?
 					//	No, on get_write_buffer() actually the previous buffer is getting available 
 					//	for read for threads that wait for new data (wait on global pthead mutex 
 					//	wait_condition). 
 				index_in_current_write_buffer = 0;
 			}
 			if(NMUX_DEBUG) fprintf(stderr, "mainfor: reading...\n");
 			int read_ret = read(STDIN_FILENO, current_write_buffer + index_in_current_write_buffer, bufsize - index_in_current_write_buffer);
 			if(NMUX_DEBUG) fprintf(stderr, "read %d\n", read_ret);
 			if(read_ret>0)
 			{
 				index_in_current_write_buffer += read_ret;
 			}
 			else if(read_ret==0)
 			{
 				//End of input stream, close clients and exit
 				print_exit(MSG_START "(main thread/for) end input stream, exiting.\n");
 			}
 			else if(read_ret==-1)
 			{
 				if(errno == EAGAIN) { if(NMUX_DEBUG) fprintf(stderr, "mainfor: read EAGAIN\n"); /* seems like select would block forever, so we just read again */ }
 				else error_exit(MSG_START "(main thread/for) error in read(), exiting.\n");
 			}
 		}
 	}
 }
 void* client_thread (void* param)
 {
 	fprintf(stderr, "client 0x%x: started!\n", (intptr_t)param);
 	client_t* this_client = (client_t*)param;
 	this_client->status = CS_THREAD_RUNNING;
 	int retval;
 	tsmpool* lpool = this_client->lpool;
 	if(NMUX_DEBUG) fprintf(stderr, "client 0x%x: socket = %d!\n", (intptr_t)param, this_client->socket);
 	if(NMUX_DEBUG) fprintf(stderr, "client 0x%x: poll init...", (intptr_t)param);
 	struct pollfd pollfds[1];
 	pollfds[0].fd = this_client->socket;
 	pollfds[0].events = POLLOUT;
 	pollfds[0].revents = 0;
 	if(NMUX_DEBUG) fprintf(stderr, "client poll inited.\n");
 	//Set this_client->socket to non-blocking
 	if(set_nonblocking(this_client->socket))
 		error_exit(MSG_START "cannot set_nonblocking() on this_client->socket");
 	int client_buffer_index = 0;
 	int client_goto_source = 0;
 	char* pool_read_buffer = NULL;
 	for(;;)
 	{
 		//Wait until there is any data to send.
 		//  If I haven't sent all the data from my last buffer, don't wait.
 		//	(Wait for the server process to wake me up.)
 		while(!pool_read_buffer || client_buffer_index >= lpool->size)
 		{
 			if(NMUX_DEBUG) fprintf(stderr, "client 0x%x: trying to grb\n", (intptr_t)param);
 			pool_read_buffer = (char*)lpool->get_read_buffer(this_client->tsmthread);
 			if(pool_read_buffer) { client_buffer_index = 0; break; }
 			if(NMUX_DEBUG) fprintf(stderr, "client 0x%x: cond_waiting for more data\n", (intptr_t)param);
 			pthread_mutex_lock(&wait_mutex);
 			this_client->sleeping = 1;
 			pthread_cond_wait(&wait_condition, &wait_mutex);
 			pthread_mutex_unlock(&wait_mutex);
 		}
 		//Wait for the socket to be available for write.
 		if(NMUX_DEBUG) fprintf(stderr, "client 0x%x: polling for socket write...", (intptr_t)param);
 		int ret = poll(pollfds, 1, -1);
 		if(NMUX_DEBUG) fprintf(stderr, "client polled for socket write.\n");
 		if(ret == 0) continue;
 		else if (ret == -1) { client_goto_source = 1; goto client_thread_exit; }
 		//Read data from global tsmpool and write it to client socket
 		if(NMUX_DEBUG) fprintf(stderr, "client 0x%x: sending...", (intptr_t)param);
 		ret = send(this_client->socket, pool_read_buffer + client_buffer_index, lpool->size - client_buffer_index, MSG_NOSIGNAL);
 		if(NMUX_DEBUG) fprintf(stderr, "client sent.\n");
 		if(ret == -1) 
 		{
 			switch(errno)
 			{
 				case EAGAIN: break;
 				default: client_goto_source = 2; goto client_thread_exit;
 			}
 		}
 		else client_buffer_index += ret;
 	}
 client_thread_exit:
 	fprintf(stderr, "client 0x%x: CS_THREAD_FINISHED, client_goto_source = %d, errno = %d", (intptr_t)param, client_goto_source, errno);
 	this_client->status = CS_THREAD_FINISHED;
 	pthread_exit(NULL);
 	return NULL;
 }
 int set_nonblocking(int fd)
 {
 	int flagtmp;
 	if((flagtmp = fcntl(fd, F_GETFL))!=-1)
 		if((flagtmp = fcntl(fd, F_SETFL, flagtmp|O_NONBLOCK))!=-1)
 			return 0;
 	return 1;
 }
 void error_exit(const char* why)
 {
 	perror(why); //do we need a \n at the end of (why)?
 	exit(1);
 }
 void print_exit(const char* why)
 {
 	fprintf(stderr, "%s", why);
 	exit(1);
 }
 void maxfd(int* maxfd, int fd)
 {
 	if(fd>=*maxfd) *maxfd=fd+1;
 }
--- a/nmux.h
+++ b/nmux.h
@ -0,0 +1,45 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <getopt.h>
 #include <signal.h>
 #include <string.h>
 #include <unistd.h>
 #include <pthread.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <poll.h>
 #include <arpa/inet.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include "tsmpool.h"
 #define MSG_START "nmux: "
 #define NMUX_DEBUG 0
 typedef enum client_status_e
 {
 	CS_CREATED,
 	CS_THREAD_RUNNING,
 	CS_THREAD_FINISHED
 } client_status_t;
 typedef struct client_s
 {
 	struct sockaddr_in addr;
 	int socket;
 	int error; //set to non-zero on error (data transfer failed)
 	pthread_t thread;
    tsmthread_t* tsmthread;
 	client_status_t status;
    //the following members are there to give access to some global variables inside the thread:
    tsmpool* lpool; 
    int sleeping;
 } client_t;
 void print_exit(const char* why);
 void sig_handler(int signo);
 void* client_thread (void* param);
 void error_exit(const char* why);
 void maxfd(int* maxfd, int fd);
 int set_nonblocking(int fd);
--- a/test200.c
+++ b/test200.c
@ -0,0 +1,124 @@
 /*
 This software is part of libcsdr, a set of simple DSP routines for 
 Software Defined Radio.
 Copyright (c) 2014-2015, Andras Retzler <randras@sdr.hu>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of the copyright holder nor the
      names of its contributors may be used to endorse or promote products
      derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED. IN NO EVENT SHALL ANDRAS RETZLER BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <time.h>
 #include "libcsdr.h"
 #include "libcsdr_gpl.h"
 #define T_BUFSIZE (1024*1024/4)
 #define T_N (200)
 #define T_TAPS (1023)
 #define T_DECFACT (200)
 int main()
 {
 	fprintf(stderr,"Getting a %d of random samples...\n", T_BUFSIZE);
 	int urand_fp = open("/dev/urandom",O_RDWR);
 	unsigned char* buf_u8 = (unsigned char*)malloc(sizeof(unsigned char)*T_BUFSIZE*2);
 	complexf* buf_c = (complexf*)malloc(sizeof(complexf)*T_BUFSIZE);
 	complexf* outbuf_c = (complexf*)malloc(sizeof(complexf)*T_BUFSIZE);
 	read(urand_fp, buf_u8, T_BUFSIZE);
 	close(urand_fp);
 	for(int i=0;i<T_BUFSIZE;i++)
 	{ 
 		iof(buf_c,i)=buf_u8[2*i]/128.0;
 		qof(buf_c,i)=buf_u8[2*i+1]/128.0;
 	}
 	float* taps_f = (float*)malloc(sizeof(float)*T_TAPS);
 	firdes_lowpass_f(taps_f, T_TAPS, 1.0f/T_DECFACT, WINDOW_DEFAULT);
 	struct timespec start_time, end_time;	
 	fprintf(stderr,"Starting tests of processing %d samples...\n", T_BUFSIZE*T_N);
 	//fir_decimate_cc
        clock_gettime(CLOCK_MONOTONIC_RAW, &start_time);
        for(int i=0;i<T_N;i++) fir_decimate_cc(buf_c, outbuf_c, T_BUFSIZE, 10, taps_f, T_TAPS);
        clock_gettime(CLOCK_MONOTONIC_RAW, &end_time);
        fprintf(stderr,"fir_decimate_cc done in %g seconds.\n",TIME_TAKEN(start_time,end_time));
 	//shift_math_cc
 	float starting_phase = 0;
 	clock_gettime(CLOCK_MONOTONIC_RAW, &start_time);
 	for(int i=0;i<T_N;i++) starting_phase = shift_math_cc(buf_c, outbuf_c, T_BUFSIZE, 0.1, starting_phase);
 	clock_gettime(CLOCK_MONOTONIC_RAW, &end_time);
 	fprintf(stderr,"shift_math_cc done in %g seconds.\n",TIME_TAKEN(start_time,end_time));
 	//shift_table_cc	
 	shift_table_data_t shift_table_data=shift_table_init(65536);
 	starting_phase = 0;
 	clock_gettime(CLOCK_MONOTONIC_RAW, &start_time);
 	for(int i=0;i<T_N;i++) starting_phase = starting_phase=shift_table_cc(buf_c, outbuf_c, T_BUFSIZE, 0.1, shift_table_data, starting_phase);;
 	clock_gettime(CLOCK_MONOTONIC_RAW, &end_time);
 	fprintf(stderr,"shift_table_cc (table size = %d) done in %g seconds.\n",65536,TIME_TAKEN(start_time,end_time));
 	//shift_addition_cc	
 	shift_addition_data_t data_addition = shift_addition_init(0.1);
 	starting_phase = 0;
 	clock_gettime(CLOCK_MONOTONIC_RAW, &start_time);
 	for(int i=0;i<T_N;i++) starting_phase = shift_addition_cc(buf_c, outbuf_c, T_BUFSIZE, data_addition, starting_phase);
 	clock_gettime(CLOCK_MONOTONIC_RAW, &end_time);
 	fprintf(stderr,"shift_addition_cc done in %g seconds.\n",TIME_TAKEN(start_time,end_time));
 	//shift_addfast_cc	
 	shift_addfast_data_t data_addfast = shift_addfast_init(0.1);
 	starting_phase = 0;
 	clock_gettime(CLOCK_MONOTONIC_RAW, &start_time);
 	for(int i=0;i<T_N;i++) starting_phase = shift_addfast_cc(buf_c, outbuf_c, T_BUFSIZE, &data_addfast, starting_phase);
 	clock_gettime(CLOCK_MONOTONIC_RAW, &end_time);
 	fprintf(stderr,"shift_addfast_cc done in %g seconds.\n",TIME_TAKEN(start_time,end_time));
 	//shift_unroll_cc
 	shift_unroll_data_t data_unroll = shift_unroll_init(0.1, T_BUFSIZE);
 	starting_phase = 0;
 	clock_gettime(CLOCK_MONOTONIC_RAW, &start_time);
 	for(int i=0;i<T_N;i++) starting_phase = shift_unroll_cc(buf_c, outbuf_c, T_BUFSIZE, &data_unroll, starting_phase);
 	clock_gettime(CLOCK_MONOTONIC_RAW, &end_time);
 	fprintf(stderr,"shift_unroll_cc done in %g seconds.\n",TIME_TAKEN(start_time,end_time));
 }
--- a/tsmpool.cpp
+++ b/tsmpool.cpp
@ -0,0 +1,74 @@
 #include "tsmpool.h"
 tsmpool::tsmpool(size_t size, int num) :
 	size(size), 
 	num(num) //number of buffers of (size) to alloc
 {
 	this->threads_cntr = 0;
 	this->ok = 1;
 	this->lowest_read_index = -1;
 	this->write_index = 0;
 	this->my_read_index = index_before(0);
    if (pthread_mutex_init(&this->mutex, NULL) != 0) { this->ok = 0; return; }
 	for(int i=0; i<num; i++) 
 	{
 		void* newptr = (void*)new char[size];
 		if(!newptr) { this->ok = 0; return; }
 		buffers.push_back(newptr);
 	}
 }
 int tsmpool::is_ok() { return this->ok; }
 void* tsmpool::get_write_buffer()
 {
 	//if(write_index==index_before(lowest_read_index)) return NULL;
 	pthread_mutex_lock(&this->mutex);
 	void* to_return = buffers[write_index];
 	write_index = index_next(write_index);
 	pthread_mutex_unlock(&this->mutex);
 	if(TSM_DEBUG) fprintf(stderr, "gwb: write_index = %d\n", write_index);
 	return to_return;
 }
 tsmthread_t* tsmpool::register_thread()
 {
 	if(!ok) return NULL;
 	pthread_mutex_lock(&this->mutex);
 	tsmthread_t* thread = new tsmthread_t();
 	thread->read_index = index_before(write_index);
 	threads.push_back(thread);
 	pthread_mutex_unlock(&this->mutex);
 	return thread;
 }
 int tsmpool::remove_thread(tsmthread_t* thread)
 {
 	pthread_mutex_lock(&this->mutex);
 	for(int i=0;i<threads.size();i++)
 		if(threads[i] == thread)
 		{
 			delete threads[i];
 			threads.erase(threads.begin()+i);
 			break;
 		}
 	pthread_mutex_unlock(&this->mutex);
 }
 void* tsmpool::get_read_buffer(tsmthread_t* thread)
 {
 	pthread_mutex_lock(&this->mutex);
 	int* actual_read_index = (thread==NULL) ? &my_read_index : &thread->read_index;
 	if(*actual_read_index==index_before(write_index)) 
 	{
 		if(TSM_DEBUG) fprintf(stderr, "grb: fail,"
 			"read_index %d is just before write_index\n", *actual_read_index);
 		pthread_mutex_unlock(&this->mutex);
 		return NULL;
 	}
 	void* to_return = buffers[*actual_read_index];
 	*actual_read_index=index_next(*actual_read_index);
 	pthread_mutex_unlock(&this->mutex);
 	if(TSM_DEBUG) fprintf(stderr, "grb: read_index = %d\n", *actual_read_index);
 	return to_return;
 }
--- a/tsmpool.h
+++ b/tsmpool.h
@ -0,0 +1,43 @@
 //tsmpool stands for Thread-Safe Memory Pool.
 //It implements a big circular buffer that one thread writes into, and multiple threads read from.
 //The reader threads have lower priority than the writer thread (they can be left behind if the don't read fast enough).
 #include <vector>
 #include <pthread.h>
 #define TSM_DEBUG 0
 #include <stdio.h>
 using namespace std;
 typedef struct tsmthread_s
 {
 	int read_index; //it always points to the next buffer to be read
 } tsmthread_t;
 class tsmpool
 {
 private:
 	vector<tsmthread_t*> threads;
 	vector<void*> buffers;
 	int threads_cntr;
 	pthread_mutex_t mutex;
 	int ok; //tsmpool is expected to be included in C-style programs. 
 			//	If something fails in the constructor, it will be seen here instead of a try{}catch{}
 	int write_index; //it always points to the next buffer to be written
 	int lowest_read_index; //unused
 	int my_read_index; //it is used when tsmpool is used as a single writer - single reader circular buffer
 public:
 	const size_t size;
 	const int num;
 	int is_ok();
 	tsmpool(size_t size, int num);
 	void* get_write_buffer();
 	tsmthread_t* register_thread();
 	int remove_thread(tsmthread_t* thread);
 	void* get_read_buffer(tsmthread_t* thread);
 	int index_next(int index) { return (index+1==num)?0:index+1; }
 	int index_before(int index) { return (index-1<0)?num-1:index-1; }
 };
		`@ -0,0 +1,2 @@`
							`#!/bin/bash`
							`gcc test200.c --std=gnu99 -o test200 -DUSE_FFTW -DLIBCSDR_GPL -lcsdr`