/**
 *	This defines a function and some handy macros for time-delays
 *	The main macros take arguments in cycles, not time, but on a PIC
 *	with a 4MHz clock, one cycle nicely corresponds to one microsecond.
 *
 *	It covers the range from 13 cycles to 190,000 cycles with
 *	rather good accuracy, perhaps within 5% or so.
 *
 *	There are also macros to do so by time period, assuming
 *	KHZ is defined somewhere, but these have limits like
 *	the macros:
 *		Clock	Small Max	Big Max
 *		20MHz	150us		38000us
 *		10MHz	300us		76000us
 *		4MHz	750us		190000us
 *		1MHz	3000us		760000us
 *		100KHz	30000us		7600000us
 *		48KHz	62500us		15833333us
 *		32KHz	93750us		23750000us
 *
 *	If you exceed these, your compiler will likely warn you of an 
 *	overflow.
 *
 *	Also note that, while the internal 4MHz clock is decently 
 *	accurate, the internal 48Khz oscillator is very rough.  :)
 *	Had to dig out a 32.768KHz crystal oscillator to verify this 
 *	worked at low speeds...
 */
#define __16f628a
#include "pic/pic16f628a.h"
#include "tsmtypes.h"

// Set the __CONFIG word:
Uint16 at 0x2007  __CONFIG = CONFIG_WORD;

#ifndef KHZ
#define KHZ 4000L
#endif

enum
{
	CLK_PORT=0,
	RX_PORT	=1,
	TX_PORT	=2,
	DAT_PORT=3,
	FIN_PORT=5,

	CLK_BIT=(1<<CLK_PORT),
	RX_BIT=(1<<RX_PORT),
	TX_BIT=(1<<TX_PORT),
	DAT_BIT=(1<<DAT_PORT),
	FIN_BIT=(1<<FIN_PORT)
};

/**
 * Delays a given number of cycles based on values in
 * loop_x, loop_b, loop_c.  You'll probably find it way easier to
 * use the macros.
 */
void cycle_eater(void);

// We're using precisely timed inline ASM.
// Therefore these variables must all be in the same bank, or
// accessible from all banks.
volatile Uint8 at 0x20 loop_x;
volatile Uint8 at 0x21 loop_b;
volatile Uint8 at 0x22 loop_c;

// This is how long cycle_eater takes for values of X, B, and C.
#define LOOP_CYCLES(X, B, C)	((X*B*4)+(C*3)+13)

/**
 *	Use this macro for delays over 750 cycles and under
 *	190,000 cycles.
 */
#define CYCLES_BIG(X)	do {					\
		loop_b=((X)-16LU)/764LU;			\
		loop_c=((((X)-16LU)%764LU)/3LU)+1;		\
		cycle_eater();					\
	} while(0)

/**
 *	Use this macro for delays under 750 cycles and over 13 cycles.
 *	It uses an assembly trick to jump direct to a branch halfway
 *	through the cycle eater function.
 */
#define CYCLES_SMALL(X) do {					\
		loop_c=((X)-11LU)/3LU;				\
		_asm	BANKSEL	_loop_c	_endasm;		\
		_asm	CALL	correction	_endasm;	\
	} while(0)


/**
 *	Calculates cycles from microsecnds based on clock speed relative 
 *	to 4MHz.
 */
#define DELAY_SMALL_US(X)	CYCLES_SMALL( ((X)*(KHZ))/4000LU )
#define DELAY_BIG_US(X)		CYCLES_BIG( ((X)*(KHZ))/4000LU	)

void main(void)
{
#ifdef __16f628a
	CMCON = 0x07;	/** Disable comparators.  NEEDED FOR NORMAL 
			 *  PORTA ON pic16f628a!
			 */
#endif
	NOT_RBPU=0;	// Enable pullups

	TRISA=0xff;	// PORTA all input
	TRISB=TX_BIT|RX_BIT|CLK_BIT|DAT_BIT;	// Setup I/O on port B
	PORTB=0;
	/**
	 * If you're using the internal clock, you can set this to
	 * run at 48Khz.  Be sure to change the value of KHZ to match.
	 */
//	OSCF=0;

LOOP_MAIN:
	PORTB=FIN_BIT;		// Light port B
	DELAY_BIG_US(10000LU);	// Wait 10ms
	PORTB=0;		// Clear port B
	DELAY_BIG_US(10000LU);	// Wait 10ms
	goto LOOP_MAIN;
}

void cycle_eater(void)
{
	_asm	banksel	_loop_x	// mumblegrumble
		nop
delay_big:	movlw	191		// Calibrated for b*764 cycles
		movwf	_loop_x	// Load W into reg
delay_inner:	nop	// To make the inner loop take 4 cycles per
		decfsz	_loop_x, 1
		goto	delay_inner
		decfsz	_loop_b,1
		goto	delay_big
correction:	decfsz	_loop_c, 1
		goto	correction
	_endasm;
}
