/* powerpc-sys5.s -- assembly support. */

/*
 * QuickThreads -- Threads-building toolkit.
 * Copyright (c) 1993 by David Keppel
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation for any purpose and without fee is hereby
 * granted, provided that the above copyright notice and this notice
 * appear in all copies.  This software is provided as a
 * proof-of-concept and for demonstration purposes; there is no
 * representation about the suitability of this software for any
 * purpose.


 * PowerPC-System V thread switching module.
 * 
 * This software is largely based on the original PowerPC-Linux porting
 * developed by Ken Aaker <kenaaker@silverbacksystems.com>
 *
 * Marco Bucci <marco.bucci@inwind.it>
 * December 2002
 *
 */


/*
 * 
 * PowerPC Register convections:
 * 
 *  r0			volatile
 *  r1			SP
 *  r2			system reserved
 *  r3-r4		volatile for parameter passing and function return
 *  r5-r10		volatile for parameter passing
 *  r11-r12		volatile
 *  r13-r14		non volatile registers
 *  f0			volatile
 *  f1			volatile for parameter passing and function return	
 *  f2-f13		volatile for parameter passing
 *  f14-f31		non volatile
 * 
 *  cr2-cr4		non volatile
 * 
 * 
 * See on the heather file for more documentation.
 *
 *
 * 
 * IMPLEMENTATION NOTES
 *
 *
 * 1) Condition register saving
 * On most machines, the condition code register is caller-save.
 * On the PPC, the condition code register is callee-save, so the
 * thread context switch must preserve it.
 * 
 * 
 * 2) Floating point registers saving
 * On resuming a thread, floating point registers are or not restored just
 * depending on which block routine suspended the thread (i.e. regardless
 * whether "qt_block", "qt_blocki" or "qt_abort" is used to resume it). 
 * This behaviour is obtained by implementing "qt_block" by means af a nested
 * call to "qt_blocki". As a result, the blocking of a thread always goes
 * and returns through "qt_blocki and, if a thread was blocked by "qt_block",
 * its execution resumes from the floating point restoring code on exit
 * of "qt_block".
 *
 * Thanks to David Keppel that explained me this "simple" trick.
 *
 *
 * 3) C languace code debugging
 * The original version of this software was developed and debugged under
 * MacOS X using the Metrowerks Code Warrior PPC integrated assembler.
 * It could be still used with a C inline assembler by means of a suitable
 * file to include it.
 * In order to avoid "copy and paste" bugs, and make easyer the maintaining,
 * I made the minimal changes, so you can find some strange code as:
 * 
 *   #if 0
 *   .if 0
 *      C code here
 *   .endif
 *   #endif
 *
 * This is just to embed some C code that is needed by the Code Warrior 
 * integrated assembler.
 *
 *
 * 4) Assembly constants generation
 * Constants used in the assembly code are generated by running 
 * the C code in the sequel (commented). It uses the C macros declared in
 * the C heather in order to guarantee that the C interface and the assebly
 * code are "aligned". I avoided the use of an assebler preprocessor since
 * they are not so standard and moreover using macro espressions makes the 
 * assembly debugging more difficult.
 *
 *


#include <iostream>
#include "powerpc_sys5.h"

int main()
{
	using namespace std;
	
	int i;
	
	cout << ".set LR_SAVE, " << PPC_LR_SAVE << endl;
	cout << ".set BLOCKI_FSIZE, " << QUICKTHREADS_BLOCKI_FRAME_SIZE << endl;
	cout << ".set BLOCKI_CR_SAVE, " << QUICKTHREADS_BLOCKI_CR_SAVE << endl;
	cout << ".set BLOCK_FSIZE, " << QUICKTHREADS_BLOCK_FRAME_SIZE << endl;

	cout << endl;
	for(i=0; i<12; i++)
	 	cout << ".set PAR_" << i << ", " << PPC_PAR(i) << endl;

	cout << endl;
	i = 13;
	cout << ".set GPR_SAVE_" << i << ", " << QUICKTHREADS_BLOCKI_GPR_SAVE(i) << endl;

	cout << endl;
	for(i=31; i>13; i--)
	 	cout << ".set FPR_SAVE_" << i << ", " << QUICKTHREADS_BLOCK_FPR_SAVE(i) << endl;

	cout << endl;
	cout << ".set VARGS_BKOFF, " << QUICKTHREADS_VARGS_BKOFF << endl;
	
	
	cout << endl << endl << endl;
	
	for(i=31; i>13; i--)
	 	cout << "\tstfd\tf" << i << ",FPR_SAVE_" << i << "(%r1)" << endl;
	 	
	cout << endl;
	for(i=31; i>13; i--)
	 	cout << "\tlfd \tf" << i << ",FPR_SAVE_" << i << "(%r1)" << endl;
	 	
	cout << endl << endl << endl;
		
	
	return 0;
}



 *
 *
 *
 */
	

#if 0

	.text
	.align 4
	
	.globl qt_block
	.globl _qt_block
	.globl qt_blocki
	.globl _qt_blocki
	.globl qt_abort
	.globl _qt_abort
	.globl qt_start
	.globl _qt_start
	.globl qt_vstart
	.globl _qt_vstart


.set LR_SAVE, 4
.set BLOCKI_FSIZE, 96
.set BLOCKI_CR_SAVE, 8  /* CR is saved into the callee's stack frame */
.set BLOCK_FSIZE, 160

.set PAR_0, 8
.set PAR_1, 12
.set PAR_2, 16
.set PAR_3, 20
.set PAR_4, 24
.set PAR_5, 28
.set PAR_6, 32
.set PAR_7, 36
.set PAR_8, 40
.set PAR_9, 44
.set PAR_10, 48
.set PAR_11, 52

.set GPR_SAVE_13, 20

.set FPR_SAVE_31, 152
.set FPR_SAVE_30, 144
.set FPR_SAVE_29, 136
.set FPR_SAVE_28, 128
.set FPR_SAVE_27, 120
.set FPR_SAVE_26, 112
.set FPR_SAVE_25, 104
.set FPR_SAVE_24, 96
.set FPR_SAVE_23, 88
.set FPR_SAVE_22, 80
.set FPR_SAVE_21, 72
.set FPR_SAVE_20, 64
.set FPR_SAVE_19, 56
.set FPR_SAVE_18, 48
.set FPR_SAVE_17, 40
.set FPR_SAVE_16, 32
.set FPR_SAVE_15, 24
.set FPR_SAVE_14, 16




/* various offsets used by "qt_varg" */
.set P_T, PAR_0
.set P_STARTUP, PAR_1
.set P_USERF, PAR_2
.set P_CLEANUP, PAR_3
		/* the offset used to move back the linkage area to be adiacent to
		 * the variant argument list  before  calling "userf(...).
		 * Skip "t", "startup", "userf", "cleanup" and first
         * 8 parameters (since they are passed via registers) */
.set VARGS_BKOFF, 48

		/* location where "t" and "cleanup" are saved (with respect of
		 * the stack frame base) */
.set P_T_SAVE, -4
.set P_CLEANUP_SAVE, -8

#endif



/* Block the current thread saving all integer non volatile registers and
 * start a new thread.
 */
#if 0
.if 0
#endif
void *qt_blocki (void *helper, void *a0, void *a1, void *newthread);
asm void *qt_blocki (void *helper, void *a0, void *a1, void *newthread)
{
#if 0
.endif
#endif

#if 0
qt_blocki:
_qt_blocki:
#endif
/* prolog code */
	stwu	%r1,-BLOCKI_FSIZE(%r1)		/* allocate the stack frame */
	mflr	%r0							/* return addr in r0 */
	mfcr	%r11							/* CR in r11 */
	stw		%r0,LR_SAVE+BLOCKI_FSIZE(%r1)	/* save return addr in the stack */
	stw		%r11,BLOCKI_CR_SAVE(%r1)	/* save CR in the stack */
	stmw	%r13,GPR_SAVE_13(%r1)	 		/* save non-volatile reg */

/* call helper(qt_t *old, void *a0, void *a1) */
	mtlr	%r3				/* "helper" addr in the link reg */
	mr		%r3,%r1			/* current thread (i.e. the SP) in arg "old" */
	mr		%r1,%r6         	/* swap to the new thread (i.e. to its SP) */
	blrl								/* jump to "helper" */
/* the "helper" return value is returned (since r3 is not changed) */

/* epilog code: return to the new thread's "qt_blocki" caller */
	lmw     %r13,GPR_SAVE_13(%r1)			/* restore non-volatile reg */
	lwz		%r0,LR_SAVE+BLOCKI_FSIZE(%r1)	/* recover return addr */
	lwz		%r11,BLOCKI_CR_SAVE(%r1)	/* recover CR */
	mtlr	%r0							/* return address in the link reg */
	mtcr	%r11							/* restore CR */
	addi    %r1,%r1,BLOCKI_FSIZE			/* free the stack frame */
	blr									/* return */

#if 0
.if 0
#endif
}
#if 0
.endif
#endif



/* Abort the current thread and start a new thread.
 */
#if 0
.if 0
#endif
void qt_abort (void *helper, void *a0, void *a1, void *newthread);
asm void qt_abort (void *helper, void *a0, void *a1, void *newthread)
{
#if 0
.endif
#endif

#if 0
qt_abort:
_qt_abort:
#endif
/* prolog code */
/* there is no prolog. It will never come back */

/* call helper(qt_t *old, void *a0, void *a1) */
	mtlr	%r3					/* "helper" addr in the link reg */
	mr		%r1,%r6         		/* swap to the new thread (i.e. to its SP) */
/* we don't need to set "old", we can pass just garbage. Actually, since r3
 is not changed, "old" is set to "helper" (don't care) */
	blrl								/* call "helper" */
/* the "helper" return value is returned (since r3 is not changed) */

/* epilog code: return to the new thread's "qt_blocki" caller */
	lmw     %r13,GPR_SAVE_13(%r1)			/* restore non-volatile reg */
	lwz		%r0,LR_SAVE+BLOCKI_FSIZE(%r1)	/* recover return addr */
	lwz		%r11,BLOCKI_CR_SAVE(%r1)	/* recover CR */
	mtlr	%r0							/* return address in the link reg */
	mtcr	%r11							/* restore CR */
	addi    %r1,%r1,BLOCKI_FSIZE			/* free the stack frame */
	blr									/* return */

#if 0
.if 0
#endif
}
#if 0
.endif
#endif



/* Block the current thread saving all non volatile registers and start
 * a new thread.
 */
#if 0
.if 0
#endif
void *qt_block (void *helper, void *a0, void *a1, void *newthread);
asm void *qt_block (void *helper, void *a0, void *a1, void *newthread)
{
#if 0
.endif
#endif

# if 0
qt_block:
_qt_block:
#endif
/* prolog code */
	stwu	%r1,-BLOCK_FSIZE(%r1)			/* allocate the stack frame */
	mflr	%r0							/* return addr in r0 */
	stw		%r0,LR_SAVE+BLOCK_FSIZE(%r1)	/* save return addr in the stack */

/* save non-volatile fp reg */
    stfd    %f31,FPR_SAVE_31(%r1)
    stfd    %f30,FPR_SAVE_30(%r1)
    stfd    %f29,FPR_SAVE_29(%r1)
    stfd    %f28,FPR_SAVE_28(%r1)
    stfd    %f27,FPR_SAVE_27(%r1)
    stfd    %f26,FPR_SAVE_26(%r1)
    stfd    %f25,FPR_SAVE_25(%r1)
    stfd    %f24,FPR_SAVE_24(%r1)
    stfd    %f23,FPR_SAVE_23(%r1)
    stfd    %f22,FPR_SAVE_22(%r1)
    stfd    %f21,FPR_SAVE_21(%r1)
    stfd    %f20,FPR_SAVE_20(%r1)
    stfd    %f19,FPR_SAVE_19(%r1)
    stfd    %f18,FPR_SAVE_18(%r1)
    stfd    %f17,FPR_SAVE_17(%r1)
    stfd    %f16,FPR_SAVE_16(%r1)
    stfd    %f15,FPR_SAVE_15(%r1)
    stfd    %f14,FPR_SAVE_14(%r1)
/* block the thread */	
	bl		qt_blocki
/* the thread is going to be resumed */
/* restore non-volatile fp reg */
    lfd     %f31,FPR_SAVE_31(%r1)
    lfd     %f30,FPR_SAVE_30(%r1)
    lfd     %f29,FPR_SAVE_29(%r1)
    lfd     %f28,FPR_SAVE_28(%r1)
    lfd     %f27,FPR_SAVE_27(%r1)
    lfd     %f26,FPR_SAVE_26(%r1)
    lfd     %f25,FPR_SAVE_25(%r1)
    lfd     %f24,FPR_SAVE_24(%r1)
    lfd     %f23,FPR_SAVE_23(%r1)
    lfd     %f22,FPR_SAVE_22(%r1)
    lfd     %f21,FPR_SAVE_21(%r1)
    lfd     %f20,FPR_SAVE_20(%r1)
    lfd     %f19,FPR_SAVE_19(%r1)
    lfd     %f18,FPR_SAVE_18(%r1)
    lfd     %f17,FPR_SAVE_17(%r1)
    lfd     %f16,FPR_SAVE_16(%r1)
    lfd     %f15,FPR_SAVE_15(%r1)
    lfd     %f14,FPR_SAVE_14(%r1)
	
	lwz		%r0,LR_SAVE+BLOCK_FSIZE(%r1)	/* recover return addr */
	mtlr	%r0							/* return address in the link reg */
	addi    %r1,%r1,BLOCK_FSIZE			/* free the stack frame */
	blr									/* return */

#if 0
.if 0
#endif
}
#if 0
.endif
#endif

	
	
/* Start a single argument thread using parameters preloaded in the stack
 * during thread initialization (see comments on stack initialization in the
 * heather file).
 *
 * Executes:
 *
 *    only(u, t, userf);
 */
#if 0
.if 0
#endif
void qt_start(void);
asm void qt_start(void)
{
#if 0
.endif
#endif

#if 0
qt_start:
_qt_start:
#endif
        lwz     %r3,PAR_0(%r1)	     	/* "u" in r3 */
        lwz     %r4,PAR_1(%r1)	     	/* "t" in r4 */
        lwz     %r5,PAR_2(%r1)	     	/* "userf" in r5 */
        lwz     %r6,PAR_3(%r1)	     	/* "only" in r6 */
        mtlr    %r6						/* "only" address in the link reg */
/*  call only(u, t, userf) */
        blrl                    		/* jump to "only" */
/* error if it returns */
        b       qt_error
/* dead code (some inline asm "wants" the epilog, or they genetare it) */
        blr

#if 0
.if 0
#endif
}
#if 0
.endif
#endif



/* Start a variant argument thread using parameters preloaded in the stack
 * during thread initialization (see comments on stack initialization in the
 * heather file).
 *
 * Executes:
 *
 *    startup(t);
 *    userf_return = userf(...);
 *    cleanup(pt, userf_return);
 *


 ***** Stack layout on start *****


 backchain ->           STACK BOTTOM (higher address)
                        +==========================+
 backchain - 4 ->       |                          | 
                        +   LOCAL VARIABLES AREA   +
                               ..............
                        +                          +
                        |                          |
                        +--------------------------+
                        |                          | 
                        +      ALIGNMEBNT PAD      +
                               ..............
                        +       (if needed)        +
                        |                          |
                        +--------------------------+
                        |                          | arg(n)
                        +                          +
                        |                          | 
                        +  VARIABLE ARGUMENT LIST  +
                               ..............
                        +      for userf call      +
 SP + PAR(5) ->         |                          | arg(1)
                        +                          +
 SP + PAR(4) ->         |                          | arg(0)
                        +--------------------------+
 SP + PAR(3) ->         |                          | cleanup par
                        +                          +
 SP + PAR(2) ->         |                          | userf par
                        +      PARAMETER AREA      +
 SP + PAR(1) ->         |                          | startup par
                        +                          +
 SP + PAR(0) ->         |                          | t par
                        +--------------------------+
                        |                          |
                        +       LINKAGE AREA       +
 SP ->                  |                          |
                        +==========================+
                         STACK TOP (lower address)

                             Stack grows down
                                     |
                                     V



 ***** Stack layout before call userf *****


 backchain ->           STACK BOTTOM (higher address)
                        +==========================+
 backchain - 4 ->       |                          | 
                        +   LOCAL VARIABLES AREA   +
                               ..............
                        +                          +
                        |                          |
                        +--------------------------+
                        |                          | 
                        +      ALIGNMEBNT PAD      +
                               ..............
                        +       (if needed)        +
                        |                          |
                        +--------------------------+
                        |                          | arg(n)
                        +                          +
                        |                          | 
                        +  VARIABLE ARGUMENT LIST  +
                               ..............
                        +      for userf call      +
 SP + PAR(1) ->         |                          | arg(1)
                        +                          +
 SP + PAR(0) ->         |                          | arg(0)
                        +--------------------------+
                        |                          |
                        +       LINKAGE AREA       +
 SP ->                  |                          |
                        +==========================+
                         STACK TOP (lower address)

                             Stack grows down
                                     |
                                     V


 * To call "userf(...)", the argument list must be adiacent to the linkage
 * area. Instead of copy the argument list, we move back the linkage area
 * (actually, we just increase the SP and copy the backchain). "t" and 
 * "cleanup" are saved in a local variable area in order to call 
 * cleanup(pt, userf_return).

*/
		

#if 0
.if 0
#endif
void qt_vstart(void);
asm void qt_vstart(void)
{
#if 0
.endif
#endif

#if 0
qt_vstart:
_qt_vstart:
#endif
/* NOTICE: the callee routines could save parameter registers in the caller's 
 * stack parameter area. We put "t" in PAR(0) in such a way, if startup(t) 
 * will save "t", it will be saved on the same location thus not delething 
 * any other parameter.
 */

/* since we will move back the linckage area (to make it adiacent to the 
 * parameter list), we need to save "t" and "cleanup". We have made room for 
 * this on the bottom of the stack frame. */
    
/* save parameters in the local variable area */
	lwz		%r11,0(%r1)				/* get the backchain */
 	lwz     %r3,P_T(%r1)
 	lwz     %r4,P_CLEANUP(%r1)
	stw		%r3,P_T_SAVE(%r11)		/* save "pt" */
	stw		%r4,P_CLEANUP_SAVE(%r11)	/* save "cleanup" */
	
/* call startup(t) */
    lwz     %r5,P_STARTUP(%r1)
	mtlr    %r5
    blrl                    		/* call "startup" */

/* call userf(...) */
	lwz		%r11,0(%r1)				/* reload backchain (r11 is volatile) */			
	lwz		%r4,P_USERF(%r1)			/* load "userf"  */
    mtlr    %r4						

	/* first eight parameter of the variant list must be copyed in
	 * GPR3-GPR10. There is a four places offset due to "t", "startup",
	 * userf" and "cleanup" */

	lwz		%r3,PAR_4(%r1)        
	lwz		%r4,PAR_5(%r1)        
	lwz		%r5,PAR_6(%r1)        
	lwz		%r6,PAR_7(%r1)        
	lwz		%r7,PAR_8(%r1)        
	lwz		%r8,PAR_9(%r1)        
	lwz		%r9,PAR_10(%r1)        
	lwz		%r10,PAR_11(%r1)


	/* move the linkage area to be adiacent to the argument list */
	stw		%r11,VARGS_BKOFF(%r1)		/* copy backchain */
	addi	%r1,%r1,VARGS_BKOFF			/* move back the stack */

	blrl							/* call "userf" */        

/* call qt_cleanup(void *pt, void *vuserf_return) */
	lwz		%r11,0(%r1)				/* reload backchain (r11 is volatile) */			

	mr		%r4,%r3					/* push "userf" return as 2nd parameter */
	lwz		%r3,P_T_SAVE(%r11)		/* reload "pt" */
	lwz		%r5,P_CLEANUP_SAVE(%r11)	/* reload "cleanup" */
	mtlr	%r5
	blrl
	b       qt_error
/* dead code (some inline asm "wants" the epilog, or they genetare it) */
	blr

#if 0
.if 0
#endif
}
#if 0
.endif
#endif