I've updated my previous rough draft code a bit, once I found my mistake with the random register values. For example, the atomic add now looks like:
           static __inline__ void atomic_add(int i, atomic_t *v)
{
__asm__ __volatile__(
SMP_LOCK “add %1, %0, %0“
:“=m” (v->counter)
:"ir” (i), “m” (v->counter)) ; - this needs fixing/porting to proper aarch64
}
{
__asm__ __volatile__(
SMP_LOCK “add %1, %0, %0“
:“=m” (v->counter)
:"ir” (i), “m” (v->counter)) ; - this needs fixing/porting to proper aarch64
}
No need to provide actual registers since their own code is just using whichever ones are available to them. 
I've also manager to complete the port of subtract, subtract and test, increment, increment and test, decrement and decrement and test, which I've maybe inconveniently added below. It wasn't too much to change really. I just hope "=m" and "=qm" are still valid in aarch64 assembler, as I was not able to find replacements for them. The c-style comments are for blog readability and not actually in the code.
//subtract
static __inline__ void atomic_sub(int i, atomic_t *v)
{
__asm__ __volatile__(
SMP_LOCK “sub %2, %1“
: “=m” (v->counter)
:"ir” (i), “m” (v->counter));
}
{
__asm__ __volatile__(
SMP_LOCK “sub %2, %1“
: “=m” (v->counter)
:"ir” (i), “m” (v->counter));
}
//Subtract and test
static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
{
unsigned char c;
__asm__ __volatile__(
SMP_LOCK “sub %2, %0; beq %1“
:"=m” (v->counter), “=qm"(c)
: “ir” (i), “m” (v->counter) : “memory”);
return c;
}
//Increment
static __inline__ void atomic_inc(atomic_t *v)
{
__asm__ __volatile(
SMP_LOCK “add %0“
:"=m” (v->counter)
:"m” (v->counter));
}
static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
{
unsigned char c;
__asm__ __volatile__(
SMP_LOCK “sub %2, %0; beq %1“
:"=m” (v->counter), “=qm"(c)
: “ir” (i), “m” (v->counter) : “memory”);
return c;
}
//Increment
static __inline__ void atomic_inc(atomic_t *v)
{
__asm__ __volatile(
SMP_LOCK “add %0“
:"=m” (v->counter)
:"m” (v->counter));
}
//decrement
static __inline__ void atomic_dec(atomic_t *v)
{
__asm__ __volatile__(
SMP_LOCK "sub %0"
:"=m" (v->counter)
:"m" (v->counter));
}
static __inline__ void atomic_dec(atomic_t *v)
{
__asm__ __volatile__(
SMP_LOCK "sub %0"
:"=m" (v->counter)
:"m" (v->counter));
}
//Decrement and test
static __inline__ int atomic_dec_and_test(atomic_t *v)
{
unsigned char c;
__asm__ __volatile__(
SMP_LOCK "sub %0; beq %1"
:"=m" (v->counter), "=qm" (c)
:"m" (v->counter) : "memory");
return c != 0;
}
static __inline__ int atomic_dec_and_test(atomic_t *v)
{
unsigned char c;
__asm__ __volatile__(
SMP_LOCK "sub %0; beq %1"
:"=m" (v->counter), "=qm" (c)
:"m" (v->counter) : "memory");
return c != 0;
}
//increment and test
static __inline__ int atomic_inc_and_test(atomic_t *v)
{
unsigned char c;
__asm__ __volatile__(
SMP_LOCK "add %0; beq %1"
:"=m" (v->counter), "=qm" (c)
:"m" (v->counter) : "memory");
return c != 0;
}
static __inline__ int atomic_inc_and_test(atomic_t *v)
{
unsigned char c;
__asm__ __volatile__(
SMP_LOCK "add %0; beq %1"
:"=m" (v->counter), "=qm" (c)
:"m" (v->counter) : "memory");
return c != 0;
}
//Check to see if addition results in negative
static __inline__ int atomic_add_negative(int i, atomic_t *v)
{
unsigned char c;
__asm__ __volatile__(
SMP_LOCK "add %2,%0; bne %1"
:"=m" (v->counter), "=qm" (c)
:"ir" (i), "m" (v->counter) : "memory");
return c;
}
What remains of the atomic.h is a mask clear and set, and I need to track down the proper aarch64 versions of  logical 'andl' as well as 'orl', which thanks to the ARM pdf file we grabbed ages ago for class, have made themselves evident. I am not sure if they even require a port, since the code comments say they are x86 specific, Better safe than sorry I say (and say only for this particular instance). 
It's also not immediately clear if exclusive OR rather than inclusive OR needs to be used, which is another issue, but I would think the comments would mentioned if it wasn't inclusive. Inclusive it is (and firing off an email to the devs just to be sure).
//Mask code:
#define atomic_clear_mask(mask, addr) \
__asm__ __volatile__(
__asm__ __volatile__(
SMP_LOCK "AND %0,%1" \
: : "r" (~(mask)),"m" (*addr) : "memory")
#define atomic_set_mask(mask, addr) \
__asm__ __volatile__(SMP_LOCK "ORR %0,%1" \
: : "r" (mask),"m" (*addr) : "memory")
So for the purposes of SPO600, I do believe that's about all she wrote.
Ported atomics.h code for aarch64, the other asm is in dependency files, which obviously I have no control over, but all of which have aarch64/noarch versions for arm64 either out in the wild or just not in yum repositories (i'm looking at you, fftw3), and that odd "you're missing these tools" problem when I try to run the ./autoregen.sh.
I will likely still continue to work on this package over the summer with the community on my own time, and fix any issues with my code, and perhaps even submit it. Sadly, that can't be taken into account for marking purposes, but moral victories have value too
: : "r" (~(mask)),"m" (*addr) : "memory")
#define atomic_set_mask(mask, addr) \
__asm__ __volatile__(SMP_LOCK "ORR %0,%1" \
: : "r" (mask),"m" (*addr) : "memory")
So for the purposes of SPO600, I do believe that's about all she wrote.
Ported atomics.h code for aarch64, the other asm is in dependency files, which obviously I have no control over, but all of which have aarch64/noarch versions for arm64 either out in the wild or just not in yum repositories (i'm looking at you, fftw3), and that odd "you're missing these tools" problem when I try to run the ./autoregen.sh.
I will likely still continue to work on this package over the summer with the community on my own time, and fix any issues with my code, and perhaps even submit it. Sadly, that can't be taken into account for marking purposes, but moral victories have value too
