From 3fff6023602822531efdae30bc8ebf862967f1ef Mon Sep 17 00:00:00 2001
From: Friedrich Beckmann <friedrich.beckmann@hs-augsburg.de>
Date: Mon, 25 Jul 2022 17:55:39 +0200
Subject: Initial Commit

---
 .../src/test/scala/vexriscv/ip/fpu/FpuTest.scala   | 1663 ++++++++++++++++++++
 .../test/scala/vexriscv/ip/fpu/Playground.scala    |   47 +
 2 files changed, 1710 insertions(+)
 create mode 100644 VexRiscv/src/test/scala/vexriscv/ip/fpu/FpuTest.scala
 create mode 100644 VexRiscv/src/test/scala/vexriscv/ip/fpu/Playground.scala

(limited to 'VexRiscv/src/test/scala/vexriscv/ip')

diff --git a/VexRiscv/src/test/scala/vexriscv/ip/fpu/FpuTest.scala b/VexRiscv/src/test/scala/vexriscv/ip/fpu/FpuTest.scala
new file mode 100644
index 0000000..11182ac
--- /dev/null
+++ b/VexRiscv/src/test/scala/vexriscv/ip/fpu/FpuTest.scala
@@ -0,0 +1,1663 @@
+package vexriscv.ip.fpu
+
+import java.io.File
+import java.lang
+import java.util.Scanner
+
+import org.apache.commons.io.FileUtils
+import org.scalatest.funsuite.AnyFunSuite
+import spinal.core.SpinalEnumElement
+import spinal.core.sim._
+import spinal.core._
+import spinal.lib.DoCmd
+import spinal.lib.experimental.math.Floating
+import spinal.lib.sim._
+import spinal.sim.Backend.{isMac, isWindows}
+
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+import scala.sys.process.ProcessLogger
+import scala.util.Random
+import org.scalatest.funsuite.AnyFunSuite
+
+//TODO Warning DataCache write aggregation will disable itself
+class FpuTest extends AnyFunSuite{
+
+  val b2f = lang.Float.intBitsToFloat(_)
+  val b2d = lang.Double.longBitsToDouble(_)
+  val f2b = lang.Float.floatToRawIntBits(_)
+  val d2bOffset = BigInt("10000000000000000",16)
+  def d2b(that : Double) = {
+    val l = lang.Double.doubleToRawLongBits(that)
+    var a = BigInt(l)
+    if(l < 0) {
+      a = d2bOffset + a
+    }
+    a
+  }
+
+
+  test("f32f64") {
+    val p = FpuParameter(
+      withDouble = true,
+//      withAdd = false,
+//      withMul = false,
+//      withDivSqrt = false,
+      sim = true
+    )
+    testP(p)
+  }
+  test("f32") {
+    val p = FpuParameter(
+      withDouble = false,
+      sim = true
+    )
+    testP(p)
+  }
+
+  def testP(p : FpuParameter){
+    val portCount = 4
+
+    val config = SimConfig
+    config.allOptimisation
+//    config.withFstWave
+    config.compile(new FpuCore(portCount, p){
+      for(i <- 0 until portCount) out(Bits(5 bits)).setName(s"flagAcc$i") := io.port(i).completion.flags.asBits
+      setDefinitionName("FpuCore"+ (if(p.withDouble) "Double" else  ""))
+    }).doSim(seed = 42){ dut =>
+      dut.clockDomain.forkStimulus(10)
+      dut.clockDomain.forkSimSpeedPrinter(5.0)
+
+
+      class TestCase(op : String){
+        def build(arg : String) = new ProcessStream(s"testfloat_gen $arg -tininessafter -forever -$op"){
+          def f32_f32_f32 ={
+            val s = new Scanner(next)
+            val a,b,c = (s.nextLong(16).toInt)
+            (b2f(a), b2f(b), b2f(c), s.nextInt(16))
+          }
+
+          def i32_f32 ={
+            val s = new Scanner(next)
+            (s.nextLong(16).toInt, b2f(s.nextLong(16).toInt), s.nextInt(16))
+          }
+
+          def f32_i32 = {
+            val s = new Scanner(next)
+            (b2f(s.nextLong(16).toInt), s.nextLong(16).toInt, s.nextInt(16))
+          }
+
+          def f32_f32_i32 = {
+            val s = new Scanner(next)
+            val a,b,c = (s.nextLong(16).toInt)
+            (b2f(a), b2f(b), c, s.nextInt(16))
+          }
+
+          def f32_f32 = {
+            val s = new Scanner(next)
+            val a,b = (s.nextLong(16).toInt)
+            (b2f(a), b2f(b), s.nextInt(16))
+          }
+
+
+          def nextLong(s : Scanner) : Long = java.lang.Long.parseUnsignedLong( s.next(),16)
+
+          def f64_f64_f64 ={
+            val s = new Scanner(next)
+            val a,b,c = nextLong(s)
+            (b2d(a), b2d(b), b2d(c), s.nextInt(16))
+          }
+
+          def i32_f64 ={
+            val s = new Scanner(next)
+            (s.nextLong(16).toInt, b2d(nextLong(s)), s.nextInt(16))
+          }
+
+          def f64_i32 = {
+            val s = new Scanner(next)
+            (b2d(nextLong(s)), s.nextLong(16).toInt, s.nextInt(16))
+          }
+
+          def f64_f64_i32 = {
+            val str = next
+            val s = new Scanner(str)
+            val a,b = (nextLong(s))
+            (b2d(a), b2d(b), s.nextInt(16), s.nextInt(16))
+          }
+
+          def f64_f64 = {
+            val s = new Scanner(next)
+            val a,b = nextLong(s)
+            (b2d(a), b2d(b), s.nextInt(16))
+          }
+
+
+          def f32_f64_i32 = {
+            val s = new Scanner(next)
+            val a,b = nextLong(s)
+            (b2f(a.toInt), b2d(b),  s.nextInt(16))
+          }
+          def f64_f32_i32 = {
+            val s = new Scanner(next)
+            val a,b = nextLong(s)
+            (b2d(a), b2f(b.toInt), s.nextInt(16))
+          }
+        }
+        lazy val RAW = build("")
+        lazy val RNE = build("-rnear_even")
+        lazy val RTZ = build("-rminMag")
+        lazy val RDN = build("-rmin")
+        lazy val RUP = build("-rmax")
+        lazy val RMM = build("-rnear_maxMag")
+        lazy val all = List(RNE, RTZ, RDN, RUP, RMM, RAW)
+        def kill = all.foreach(_.kill)
+        def apply(rounding : FpuRoundMode.E) = rounding match {
+          case FpuRoundMode.RNE => RNE
+          case FpuRoundMode.RTZ => RTZ
+          case FpuRoundMode.RDN => RDN
+          case FpuRoundMode.RUP => RUP
+          case FpuRoundMode.RMM => RMM
+        }
+      }
+
+      class TestVector(f : String) {
+        val add = new TestCase(s"${f}_add")
+        val sub = new TestCase(s"${f}_sub")
+        val mul = new TestCase(s"${f}_mul")
+        val ui2f = new TestCase(s"ui32_to_${f}")
+        val i2f = new TestCase(s"i32_to_${f}")
+        val f2ui = new TestCase(s"${f}_to_ui32 -exact")
+        val f2i = new TestCase(s"${f}_to_i32 -exact")
+        val eq = new TestCase(s"${f}_eq")
+        val lt = new TestCase(s"${f}_lt")
+        val le = new TestCase(s"${f}_le")
+        val min = new TestCase(s"${f}_le")
+        val max = new TestCase(s"${f}_lt")
+        val transfer = new TestCase(s"${f}_eq")
+        val fclass = new TestCase(s"${f}_eq")
+        val sgnj = new TestCase(s"${f}_eq")
+        val sgnjn = new TestCase(s"${f}_eq")
+        val sgnjx = new TestCase(s"${f}_eq")
+        val sqrt = new TestCase(s"${f}_sqrt")
+        val div = new TestCase(s"${f}_div")
+      }
+
+      val f32 = new TestVector("f32"){
+        val f64 = new TestCase(s"f32_eq")
+        val cvt64 = new TestCase(s"f32_to_f64")
+      }
+      val f64 = new TestVector("f64"){
+        val f32 = new TestCase(s"f64_eq")
+        val cvt32 = new TestCase(s"f64_to_f32")
+      }
+
+      val cpus = for(id <- 0 until portCount) yield new {
+        val cmdQueue = mutable.Queue[FpuCmd => Unit]()
+        val commitQueue = mutable.Queue[FpuCommit => Unit]()
+        val rspQueue = mutable.Queue[FpuRsp => Unit]()
+
+        var pendingMiaou = 0
+        var flagAccumulator = 0
+
+        def cmdAdd(body : FpuCmd => Unit): Unit ={
+          pendingMiaou += 1
+          cmdQueue += body
+        }
+
+        def softAssert(cond : Boolean, msg : String) = if(!cond)println(msg)
+        def flagMatch(ref : Int, value : Float, report : String): Unit ={
+          val patch = if(value.abs == 1.17549435E-38f && false) 0x1f & ~2 else 0x1f
+          flagMatch(ref, report, patch)
+        }
+
+        def flagMatch(ref : Int, value : Double, report : String): Unit ={
+          val patch = if(value.abs == b2d(1 << 52) && false) 0x1f & ~2 else 0x1f
+          flagMatch(ref, report, patch)
+        }
+
+        def flagMatch(ref : Int, report : String, mask : Int = 0x1f): Unit ={
+          waitUntil(pendingMiaou == 0)
+          assert((flagAccumulator & mask) == (ref & mask), s"Flag missmatch dut=$flagAccumulator ref=$ref $report")
+          flagAccumulator = 0
+        }
+        def flagClear(): Unit ={
+          waitUntil(pendingMiaou == 0)
+          flagAccumulator = 0
+        }
+
+        val flagAggregated = dut.reflectBaseType(s"flagAcc$id").asInstanceOf[Bits]
+        dut.clockDomain.onSamplings{
+          val c = dut.io.port(id).completion
+          if(c.valid.toBoolean) {
+            pendingMiaou -= 1
+            flagAccumulator |= flagAggregated.toInt
+          }
+          dut.writeback.randomSim.randomize()
+        }
+
+        StreamDriver(dut.io.port(id).cmd ,dut.clockDomain){payload =>
+          if(cmdQueue.isEmpty) false else {
+            cmdQueue.dequeue().apply(payload)
+            true
+          }
+        }
+
+
+        StreamMonitor(dut.io.port(id)rsp, dut.clockDomain){payload =>
+          pendingMiaou -= 1
+          if(payload.NV.toBoolean) flagAccumulator |= 1 << 4
+          if(payload.NX.toBoolean) flagAccumulator |= 1 << 0
+          rspQueue.dequeue().apply(payload)
+        }
+
+        StreamReadyRandomizer(dut.io.port(id).rsp, dut.clockDomain)
+
+
+        StreamDriver(dut.io.port(id).commit ,dut.clockDomain){payload =>
+          if(commitQueue.isEmpty) false else {
+            commitQueue.dequeue().apply(payload)
+            true
+          }
+        }
+
+
+
+
+        def loadRaw(rd : Int, value : BigInt, format : FpuFormat.E): Unit ={
+          cmdAdd {cmd =>
+            cmd.opcode #= cmd.opcode.spinalEnum.LOAD
+            cmd.rs1.randomize()
+            cmd.rs2.randomize()
+            cmd.rs3.randomize()
+            cmd.rd #= rd
+            cmd.arg.randomize()
+            cmd.roundMode.randomize()
+            cmd.format #= format
+          }
+          commitQueue += {cmd =>
+            cmd.write #= true
+            cmd.rd #= rd
+            cmd.value #= value
+            cmd.opcode #= cmd.opcode.spinalEnum.LOAD
+          }
+        }
+
+
+        def load(rd : Int, value : Float): Unit ={
+          loadRaw(rd, f2b(value).toLong & 0xFFFFFFFFl, FpuFormat.FLOAT)
+        }
+
+        def load(rd : Int, value : Double): Unit ={
+          loadRaw(rd, d2b(value), FpuFormat.DOUBLE)
+        }
+
+        def storeRaw(rs : Int, format : FpuFormat.E)(body : FpuRsp => Unit): Unit ={
+          cmdAdd {cmd =>
+            cmd.opcode #= cmd.opcode.spinalEnum.STORE
+            cmd.rs1.randomize()
+            cmd.rs2 #= rs
+            cmd.rs3.randomize()
+            cmd.rd.randomize()
+            cmd.arg.randomize()
+            cmd.roundMode.randomize()
+            cmd.format #= format
+          }
+
+          rspQueue += body
+//          waitUntil(rspQueue.isEmpty)
+        }
+
+        def storeFloat(rs : Int)(body : Float => Unit): Unit ={
+          storeRaw(rs, FpuFormat.FLOAT){rsp => body(b2f(rsp.value.toBigInt.toInt))}
+        }
+        def store(rs : Int)(body : Double => Unit): Unit ={
+          storeRaw(rs, FpuFormat.DOUBLE){rsp => body(b2d(rsp.value.toBigInt.toLong))}
+        }
+
+        def fpuF2f(rd : Int, rs1 : Int, rs2 : Int, rs3 : Int, opcode : FpuOpcode.E, arg : Int, rounding : FpuRoundMode.E, format : FpuFormat.E): Unit ={
+          cmdAdd {cmd =>
+            cmd.opcode #= opcode
+            cmd.rs1 #= rs1
+            cmd.rs2 #= rs2
+            cmd.rs3 #= rs3
+            cmd.rd #= rd
+            cmd.arg #= arg
+            cmd.roundMode #= rounding
+            cmd.format #= format
+          }
+          commitQueue += {cmd =>
+            cmd.write #= true
+            cmd.rd #= rd
+            cmd.opcode #= opcode
+          }
+        }
+
+        def fpuF2i(rs1 : Int, rs2 : Int, opcode : FpuOpcode.E, arg : Int, rounding : FpuRoundMode.E, format : FpuFormat.E)(body : FpuRsp => Unit): Unit ={
+          cmdAdd {cmd =>
+            cmd.opcode #= opcode
+            cmd.rs1 #= rs1
+            cmd.rs2 #= rs2
+            cmd.rs3.randomize()
+            cmd.rd.randomize()
+            cmd.arg #= arg
+            cmd.roundMode #= rounding
+            cmd.format #= format
+          }
+          rspQueue += body
+        }
+
+
+        def mul(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E, format : FpuFormat.E): Unit ={
+          fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.MUL, 0, rounding, format)
+        }
+
+        def add(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE, format : FpuFormat.E): Unit ={
+          fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.ADD, 0, rounding, format)
+        }
+
+        def sub(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE, format : FpuFormat.E): Unit ={
+          fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.ADD, 1, rounding, format)
+        }
+
+        def div(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE, format : FpuFormat.E): Unit ={
+          fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.DIV, Random.nextInt(4), rounding, format)
+        }
+
+        def sqrt(rd : Int, rs1 : Int, rounding : FpuRoundMode.E = FpuRoundMode.RNE, format : FpuFormat.E): Unit ={
+          fpuF2f(rd, rs1, Random.nextInt(32), Random.nextInt(32), FpuOpcode.SQRT, Random.nextInt(4), rounding, format)
+        }
+
+        def fma(rd : Int, rs1 : Int, rs2 : Int, rs3 : Int, rounding : FpuRoundMode.E, format : FpuFormat.E): Unit ={
+          fpuF2f(rd, rs1, rs2, rs3, FpuOpcode.FMA, 0, rounding, format)
+        }
+
+        def sgnjRaw(rd : Int, rs1 : Int, rs2 : Int, arg : Int, format : FpuFormat.E): Unit ={
+          fpuF2f(rd, rs1, rs2, Random.nextInt(32), FpuOpcode.SGNJ, arg, FpuRoundMode.elements.randomPick(), format)
+        }
+
+        def sgnj(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = null, format : FpuFormat.E): Unit ={
+          sgnjRaw(rd, rs1, rs2, 0, format)
+        }
+        def sgnjn(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = null, format : FpuFormat.E): Unit ={
+          sgnjRaw(rd, rs1, rs2, 1, format)
+        }
+        def sgnjx(rd : Int, rs1 : Int, rs2 : Int, rounding : FpuRoundMode.E = null, format : FpuFormat.E): Unit ={
+          sgnjRaw(rd, rs1, rs2, 2, format)
+        }
+
+        def cmp(rs1 : Int, rs2 : Int, arg : Int, format : FpuFormat.E)(body : FpuRsp => Unit): Unit ={
+          fpuF2i(rs1, rs2, FpuOpcode.CMP, arg, FpuRoundMode.elements.randomPick(), format)(body)
+        }
+
+        def f2i(rs1 : Int, signed : Boolean, rounding : FpuRoundMode.E, format : FpuFormat.E)(body : FpuRsp => Unit): Unit ={
+          fpuF2i(rs1, Random.nextInt(32), FpuOpcode.F2I, if(signed) 1 else 0, rounding, format)(body)
+        }
+
+        def i2f(rd : Int, value : Int, signed : Boolean, rounding : FpuRoundMode.E, format : FpuFormat.E): Unit ={
+          cmdAdd {cmd =>
+            cmd.opcode #= cmd.opcode.spinalEnum.I2F
+            cmd.rs1.randomize()
+            cmd.rs2.randomize()
+            cmd.rs3.randomize()
+            cmd.rd #= rd
+            cmd.arg #= (if(signed) 1 else 0)
+            cmd.roundMode #= rounding
+            cmd.format #= format
+          }
+          commitQueue += {cmd =>
+            cmd.write #= true
+            cmd.rd #= rd
+            cmd.opcode #= FpuOpcode.I2F
+            cmd.value #= value.toLong & 0xFFFFFFFFl
+          }
+        }
+
+        def fmv_x_w(rs1 : Int)(body : Float => Unit): Unit ={
+          cmdAdd {cmd =>
+            cmd.opcode #= cmd.opcode.spinalEnum.FMV_X_W
+            cmd.rs1 #= rs1
+            cmd.rs2.randomize()
+            cmd.rs3.randomize()
+            cmd.rd.randomize()
+            cmd.arg #= 0
+            cmd.roundMode.randomize()
+            cmd.format #= FpuFormat.FLOAT
+          }
+          rspQueue += {rsp => body(b2f(rsp.value.toBigInt.toInt))}
+        }
+
+        def fmv_w_x(rd : Int, value : Int): Unit ={
+          cmdAdd {cmd =>
+            cmd.opcode #= cmd.opcode.spinalEnum.FMV_W_X
+            cmd.rs1.randomize()
+            cmd.rs2.randomize()
+            cmd.rs3.randomize()
+            cmd.rd #= rd
+            cmd.arg #= 0
+            cmd.roundMode.randomize()
+            cmd.format #= FpuFormat.FLOAT
+          }
+          commitQueue += {cmd =>
+            cmd.write #= true
+            cmd.rd #= rd
+            cmd.opcode #= FpuOpcode.FMV_W_X
+            cmd.value #= value.toLong & 0xFFFFFFFFl
+          }
+        }
+
+        def minMax(rd : Int, rs1 : Int, rs2 : Int, arg : Int, format : FpuFormat.E): Unit ={
+          cmdAdd {cmd =>
+            cmd.opcode #= cmd.opcode.spinalEnum.MIN_MAX
+            cmd.rs1 #= rs1
+            cmd.rs2 #= rs2
+            cmd.rs3.randomize()
+            cmd.rd #= rd
+            cmd.arg #= arg
+            cmd.roundMode.randomize()
+            cmd.format #= format
+          }
+          commitQueue += {cmd =>
+            cmd.write #= true
+            cmd.rd #= rd
+            cmd.opcode #= FpuOpcode.MIN_MAX
+          }
+        }
+
+
+
+        def fclass(rs1 : Int, format : FpuFormat.E)(body : Int => Unit) : Unit = {
+          cmdAdd {cmd =>
+            cmd.opcode #= FpuOpcode.FCLASS
+            cmd.rs1 #= rs1
+            cmd.rs2.randomize()
+            cmd.rs3.randomize()
+            cmd.rd.randomize()
+            cmd.arg.randomize()
+            cmd.roundMode.randomize()
+            cmd.format #= format
+          }
+          rspQueue += {rsp => body(rsp.value.toBigInt.toInt)}
+        }
+      }
+
+
+
+
+
+      val stim = for(cpu <- cpus) yield fork {
+        import cpu._
+
+        class RegAllocator(){
+          var value = 0
+
+          def allocate(): Int ={
+            while(true){
+              val rand = Random.nextInt(32)
+              val mask = 1 << rand
+              if((value & mask) == 0) {
+                value |= mask
+                return rand
+              }
+            }
+            0
+          }
+        }
+        def checkFloat(ref : Float, dut : Float): Boolean ={
+          if((f2b(ref) & 0x80000000) != (f2b(dut) & 0x80000000)) return  false
+          if(ref == 0.0 && dut == 0.0 && f2b(ref) != f2b(dut)) return false
+          if(ref.isNaN && dut.isNaN) return true
+          if(ref == dut) return true
+          if(ref.abs * 1.0001f + Float.MinPositiveValue >= dut.abs*0.9999f && ref.abs * 0.9999f - Float.MinPositiveValue  <= dut.abs*1.0001f) return true
+//          if(ref + Float.MinPositiveValue*2.0f  === dut || dut + Float.MinPositiveValue*2.0f  === ref)
+          false
+        }
+
+        def checkDouble(ref : Double, dut : Double): Boolean ={
+          if((d2b(ref) & Long.MinValue) != (d2b(dut) & Long.MinValue)) return  false
+          if(ref == 0.0 && dut == 0.0 && d2b(ref) != d2b(dut)) return false
+          if(ref.isNaN && dut.isNaN) return true
+          if(ref == dut) return true
+          if(ref.abs * 1.0001 + Float.MinPositiveValue >= dut.abs*0.9999 && ref.abs * 0.9999 - Double.MinPositiveValue  <= dut.abs*1.0001) return true
+          //          if(ref + Float.MinPositiveValue*2.0f  === dut || dut + Float.MinPositiveValue*2.0f  === ref)
+          false
+        }
+        def checkFloatExact(ref : Float, dut : Float): Boolean ={
+          if(ref.signum != dut.signum === dut) return  false
+          if(ref.isNaN && dut.isNaN) return true
+          if(ref == dut) return true
+          false
+        }
+
+
+        def randomFloat(): Float ={
+          val exp = Random.nextInt(10)-5
+          (Random.nextDouble() * (Math.pow(2.0, exp)) * (if(Random.nextBoolean()) -1.0 else 1.0)).toFloat
+        }
+
+        def randomDouble(): Double ={
+          val exp = Random.nextInt(10)-5
+          (Random.nextDouble() * (Math.pow(2.0, exp)) * (if(Random.nextBoolean()) -1.0 else 1.0))
+        }
+
+
+        def testBinaryOp(op : (Int,Int,Int,FpuRoundMode.E, FpuFormat.E) => Unit, a : Float, b : Float, ref : Float, flag : Int, rounding : FpuRoundMode.E, opName : String): Unit ={
+          val rs = new RegAllocator()
+          val rs1, rs2, rs3 = rs.allocate()
+          val rd = Random.nextInt(32)
+          load(rs1, a)
+          load(rs2, b)
+          op(rd,rs1,rs2, rounding, FpuFormat.FLOAT)
+          storeFloat(rd){v =>
+            assert(f2b(v) == f2b(ref), f"## ${a}  ${opName}  $b = $v, $ref $rounding")
+          }
+
+          flagMatch(flag, ref, f"## ${opName} ${a} $b $ref $rounding")
+        }
+
+
+        def testBinaryOpF64(op : (Int,Int,Int,FpuRoundMode.E, FpuFormat.E) => Unit, a : Double, b : Double, ref : Double, flag : Int, rounding : FpuRoundMode.E, opName : String): Unit ={
+          val rs = new RegAllocator()
+          val rs1, rs2, rs3 = rs.allocate()
+          val rd = Random.nextInt(32)
+          load(rs1, a)
+          load(rs2, b)
+          op(rd,rs1,rs2, rounding, FpuFormat.DOUBLE)
+          store(rd){v =>
+            assert(d2b(v) == d2b(ref), f"## ${a}  ${opName}  $b = $v, $ref $rounding, ${d2b(a).toString(16)} ${d2b(b).toString(16)} ${d2b(ref).toString(16)}")
+          }
+
+          flagMatch(flag, ref, f"## ${opName} ${a} $b $ref $rounding")
+        }
+
+
+        def testTransferF32Raw(a : Float, iSrc : Boolean, iDst : Boolean): Unit ={
+          val rd = Random.nextInt(32)
+
+          def handle(v : Float): Unit ={
+            val ref = a
+            assert(f2b(v) == f2b(ref), f"$a = $v, $ref")
+          }
+
+          if(iSrc) fmv_w_x(rd, f2b(a)) else load(rd, a)
+          if(iDst) fmv_x_w(rd)(handle) else storeFloat(rd)(handle)
+
+          flagMatch(0, f"$a")
+        }
+
+
+        def testTransferF64Raw(a : Double): Unit ={
+          val rd = Random.nextInt(32)
+
+          def handle(v : Double): Unit ={
+            val ref = a
+            assert(d2b(v) == d2b(ref), f"$a = $v, $ref")
+          }
+
+          load(rd, a)
+          store(rd)(handle)
+
+          flagMatch(0, f"$a")
+        }
+
+        def testTransferF32F64Raw(a : Float,  iSrc : Boolean): Unit ={
+          val rd = Random.nextInt(32)
+          if(iSrc) fmv_w_x(rd, f2b(a)) else load(rd, a)
+          storeRaw(rd, FpuFormat.DOUBLE){rsp =>
+            val v = rsp.value.toBigInt.toLong
+            val ref = (0xFFFFFFFFl << 32) | f2b(a)
+            assert(v == ref, f"$a = $v, $ref")
+          }
+          flagMatch(0, f"$a")
+        }
+
+        def testTransferF64F32Raw(a : Double,  iDst : Boolean): Unit ={
+          val rd = Random.nextInt(32)
+          load(rd, a)
+          if(iDst)fmv_x_w(rd){v_ =>
+            val v = f2b(v_).toLong & 0xFFFFFFFFl
+            val ref = d2b(a) & 0xFFFFFFFFl
+            assert(v == ref, f"$a = $v, $ref")
+          }
+          else storeRaw(rd, FpuFormat.FLOAT){rsp =>
+            val v = rsp.value.toBigInt.toLong & 0xFFFFFFFFl
+            val ref = d2b(a) & 0xFFFFFFFFl
+            assert(v == ref, f"$a = $v, $ref")
+          }
+          flagMatch(0, f"$a")
+        }
+
+
+        def testCvtF32F64Raw(a : Float, ref : Double, flag : Int, rounding : FpuRoundMode.E): Unit ={
+          val rs, rd = Random.nextInt(32)
+          load(rs, a)
+          fpuF2f(rd, rs, Random.nextInt(32), Random.nextInt(32), FpuOpcode.FCVT_X_X, Random.nextInt(3), rounding, FpuFormat.FLOAT)
+          store(rd){v =>
+            assert(d2b(v) == d2b(ref), f"testCvtF32F64Raw $a $ref $rounding")
+          }
+          flagMatch(flag,ref, f"testCvtF32F64Raw $a $ref $rounding")
+        }
+
+        def testCvtF64F32Raw(a : Double, ref : Float, flag : Int, rounding : FpuRoundMode.E): Unit ={
+          val rs, rd = Random.nextInt(32)
+          load(rs, a)
+          fpuF2f(rd, rs, Random.nextInt(32), Random.nextInt(32), FpuOpcode.FCVT_X_X, Random.nextInt(3), rounding, FpuFormat.DOUBLE)
+          storeFloat(rd){v =>
+            assert(d2b(v) == d2b(ref), f"testCvtF64F32Raw $a $ref $rounding")
+          }
+          flagMatch(flag, ref, f"testCvtF64F32Raw $a $ref $rounding")
+        }
+
+
+        def testClassRaw(a : Float) : Unit = {
+          val rd = Random.nextInt(32)
+
+
+          load(rd, a)
+          fclass(rd, FpuFormat.FLOAT){v =>
+            val mantissa = f2b(a) & 0x7FFFFF
+            val exp = (f2b(a) >> 23) & 0xFF
+            val sign = (f2b(a) >> 31) & 0x1
+
+            val refBit = if(a.isInfinite) (if(sign == 0) 7 else 0)
+            else if(a.isNaN) (if((mantissa >> 22) != 0) 9 else 8)
+            else if(exp == 0 && mantissa != 0) (if(sign == 0) 5 else 2)
+            else if(exp == 0 && mantissa == 0) (if(sign == 0) 4 else 3)
+            else if(sign == 0) 6 else 1
+
+            val ref = 1 << refBit
+
+            assert(v == ref, f"fclass $a")
+          }
+        }
+
+
+        def testClassF64Raw(a : Double) : Unit = {
+          val rd = Random.nextInt(32)
+
+
+          load(rd, a)
+          fclass(rd, FpuFormat.DOUBLE){v =>
+            val mantissa = d2b(a) & 0x000FFFFFFFFFFFFFl
+            val exp = (d2b(a) >> 52) & 0x7FF
+            val sign = (d2b(a) >> 63) & 0x1
+
+            val refBit = if(a.isInfinite) (if(sign == 0) 7 else 0)
+            else if(a.isNaN) (if((mantissa >> 51) != 0) 9 else 8)
+            else if(exp == 0 && mantissa != 0) (if(sign == 0) 5 else 2)
+            else if(exp == 0 && mantissa == 0) (if(sign == 0) 4 else 3)
+            else if(sign == 0) 6 else 1
+
+            val ref = 1 << refBit
+
+            assert(v == ref, f"fclass $a")
+          }
+        }
+
+
+
+        def testFmaRaw(a : Float, b : Float, c : Float): Unit ={
+          val rs = new RegAllocator()
+          val rs1, rs2, rs3 = rs.allocate()
+          val rd = Random.nextInt(32)
+          load(rs1, a)
+          load(rs2, b)
+          load(rs3, c)
+
+          fma(rd,rs1,rs2,rs3, FpuRoundMode.RNE, FpuFormat.FLOAT)
+          storeFloat(rd){v =>
+            val ref = a.toDouble * b.toDouble + c.toDouble
+            val mul = a.toDouble * b.toDouble
+            if((mul.abs-c.abs)/mul.abs > 0.1)  assert(checkFloat(ref.toFloat, v), f"$a%.20f * $b%.20f + $c%.20f = $v%.20f, $ref%.20f")
+          }
+        }
+
+
+
+        def testFmaF64Raw(a : Double, b : Double, c : Double): Unit ={
+          val rs = new RegAllocator()
+          val rs1, rs2, rs3 = rs.allocate()
+          val rd = Random.nextInt(32)
+          load(rs1, a)
+          load(rs2, b)
+          load(rs3, c)
+
+          fma(rd,rs1,rs2,rs3, FpuRoundMode.RNE, FpuFormat.DOUBLE)
+          store(rd){v =>
+            val ref = a.toDouble * b.toDouble + c.toDouble
+            val mul = a.toDouble * b.toDouble
+            if((mul.abs-c.abs)/mul.abs > 0.1)  assert(checkDouble(ref, v), f"$a%.20f * $b%.20f + $c%.20f = $v%.20f, $ref%.20f")
+          }
+        }
+
+        def testSqrtF64Exact(a : Double, ref : Double, flag : Int, rounding : FpuRoundMode.E): Unit ={
+          val rs = new RegAllocator()
+          val rs1, rs2, rs3 = rs.allocate()
+          val rd = Random.nextInt(32)
+          load(rs1, a)
+
+          sqrt(rd,rs1,  rounding, FpuFormat.DOUBLE)
+
+          store(rd){v =>
+            assert(d2b(v) == d2b(ref), f"## sqrt${a}   = $v, $ref $rounding, ${d2b(a).toString(16)} ${d2b(ref).toString(16)}")
+          }
+
+          flagMatch(flag, ref, f"## sqrt${a} $ref $rounding")
+        }
+
+        def testSqrtExact(a : Float, ref : Float, flag : Int, rounding : FpuRoundMode.E): Unit ={
+          val rs = new RegAllocator()
+          val rs1, rs2, rs3 = rs.allocate()
+          val rd = Random.nextInt(32)
+          load(rs1, a)
+
+          sqrt(rd,rs1,  rounding, FpuFormat.FLOAT)
+
+          storeFloat(rd){v =>
+            assert(d2b(v) == d2b(ref), f"## sqrt${a}   = $v, $ref $rounding, ${f2b(a).toString()} ${f2b(ref).toString()}")
+          }
+
+          flagMatch(flag, ref, f"## sqrt${a} $ref $rounding")
+        }
+
+
+        def testF2iExact(a : Float, ref : Int, flag : Int, signed : Boolean, rounding : FpuRoundMode.E): Unit ={
+          val rs = new RegAllocator()
+          val rs1 = rs.allocate()
+          val rd = Random.nextInt(32)
+          load(rs1, a)
+          f2i(rs1, signed, rounding, FpuFormat.FLOAT){rsp =>
+            if(signed) {
+              val v = rsp.value.toBigInt.toInt
+              var ref2 = ref
+              if(a >= Int.MaxValue) ref2 = Int.MaxValue
+              if(a <= Int.MinValue) ref2 = Int.MinValue
+              if(a.isNaN) ref2 = Int.MaxValue
+              assert(v == (ref2), f" <= f2i($a) = $v, $ref2, $rounding, $flag")
+            } else {
+              val v = rsp.value.toBigInt.toLong & 0xFFFFFFFFl
+              var ref2 = ref.toLong & 0xFFFFFFFFl
+              if(a < 0) ref2 = 0
+              if(a >= 0xFFFFFFFFl) ref2 = 0xFFFFFFFFl
+              if(a.isNaN) ref2 = 0xFFFFFFFFl
+              assert(v == ref2, f" <= f2ui($a) = $v, $ref2, $rounding $flag")
+            }
+          }
+
+          flagMatch(flag, ref, f" f2${if(signed) "" else "u"}i($a) $ref $flag $rounding")
+        }
+
+
+
+        def testF642iExact(a : Double, ref : Int, flag : Int, signed : Boolean, rounding : FpuRoundMode.E): Unit ={
+          val rs = new RegAllocator()
+          val rs1 = rs.allocate()
+          val rd = Random.nextInt(32)
+          load(rs1, a)
+          f2i(rs1, signed, rounding, FpuFormat.DOUBLE){rsp =>
+            if(signed) {
+              val v = rsp.value.toBigInt.toInt
+              var ref2 = ref
+              if(a >= Int.MaxValue) ref2 = Int.MaxValue
+              if(a <= Int.MinValue) ref2 = Int.MinValue
+              if(a.isNaN) ref2 = Int.MaxValue
+              assert(v == (ref2), f" <= f2i($a) = $v, $ref2, $rounding, $flag")
+            } else {
+              val v = rsp.value.toBigInt.toLong & 0xFFFFFFFFl
+              var ref2 = ref.toLong & 0xFFFFFFFFl
+              if(a < 0) ref2 = 0
+              if(a >= 0xFFFFFFFFl) ref2 = 0xFFFFFFFFl
+              if(a.isNaN) ref2 = 0xFFFFFFFFl
+              assert(v == ref2, f" <= f2ui($a) = $v, $ref2, $rounding $flag")
+            }
+          }
+
+          flagMatch(flag, ref, f" f2${if(signed) "" else "u"}i($a) $ref $flag $rounding")
+        }
+
+
+
+        def testI2fExact(a : Int, ref : Float, f : Int, signed : Boolean, rounding : FpuRoundMode.E): Unit ={
+          val rs = new RegAllocator()
+          val rd = Random.nextInt(32)
+          i2f(rd, a, signed, rounding, FpuFormat.FLOAT)
+          storeFloat(rd){v =>
+            val aLong = if(signed) a.toLong else a.toLong & 0xFFFFFFFFl
+            assert(f2b(v) == f2b(ref), f"i2f($aLong) = $v, $ref $rounding")
+          }
+
+
+          flagMatch(f, ref, f"i2f($a) = $ref")
+        }
+
+
+
+        def testI2f64Exact(a : Int, ref : Double, f : Int, signed : Boolean, rounding : FpuRoundMode.E): Unit ={
+          val rs = new RegAllocator()
+          val rd = Random.nextInt(32)
+          i2f(rd, a, signed, rounding, FpuFormat.DOUBLE)
+          store(rd){v =>
+            val aLong = if(signed) a.toLong else a.toLong & 0xFFFFFFFFl
+            assert(d2b(v) == d2b(ref), f"i2f($aLong) = $v, $ref $rounding")
+          }
+
+
+          flagMatch(f, ref, f"i2f($a) = $ref")
+        }
+
+
+        def testCmpExact(a : Float, b : Float, ref : Int, flag : Int, arg : Int): Unit ={
+          val rs = new RegAllocator()
+          val rs1, rs2, rs3 = rs.allocate()
+          val rd = Random.nextInt(32)
+          load(rs1, a)
+          load(rs2, b)
+          cmp(rs1, rs2, arg, FpuFormat.FLOAT){rsp =>
+            val v = rsp.value.toBigInt.toInt
+            assert(v === ref, f"cmp($a, $b, $arg) = $v, $ref")
+          }
+          flagMatch(flag,f"$a < $b $ref $flag ${f2b(a).toHexString} ${f2b(b).toHexString}")
+        }
+        def testLeRaw(a : Float, b : Float, ref : Int, flag : Int) = testCmpExact(a,b,ref,flag, 0)
+        def testEqRaw(a : Float, b : Float, ref : Int, flag : Int) = testCmpExact(a,b,ref,flag, 2)
+        def testLtRaw(a : Float, b : Float, ref : Int, flag : Int) = testCmpExact(a,b,ref,flag, 1)
+
+
+        def testCmpF64Exact(a : Double, b : Double, ref : Int, flag : Int, arg : Int): Unit ={
+          val rs = new RegAllocator()
+          val rs1, rs2, rs3 = rs.allocate()
+          val rd = Random.nextInt(32)
+          load(rs1, a)
+          load(rs2, b)
+          cmp(rs1, rs2, arg, FpuFormat.DOUBLE){rsp =>
+            val v = rsp.value.toBigInt.toInt
+            assert(v === ref, f"cmp($a, $b, $arg) = $v, $ref")
+          }
+          flagMatch(flag,f"$a < $b $ref $flag ${d2b(a)} ${d2b(b)}")
+        }
+        def testLeF64Raw(a : Double, b : Double, ref : Int, flag : Int) = testCmpF64Exact(a,b,ref,flag, 0)
+        def testEqF64Raw(a : Double, b : Double, ref : Int, flag : Int) = testCmpF64Exact(a,b,ref,flag, 2)
+        def testLtF64Raw(a : Double, b : Double, ref : Int, flag : Int) = testCmpF64Exact(a,b,ref,flag, 1)
+
+//        def testFmv_x_w(a : Float): Unit ={
+//          val rs = new RegAllocator()
+//          val rs1, rs2, rs3 = rs.allocate()
+//          val rd = Random.nextInt(32)
+//          load(rs1, a)tes
+//          fmv_x_w(rs1){rsp =>
+//            val ref = f2b(a).toLong & 0xFFFFFFFFl
+//            val v = rsp.value.toBigInt
+//            println(f"fmv_x_w $a = $v, $ref")
+//            assert(v === ref)
+//          }
+//        }
+
+//        def testFmv_w_x(a : Int): Unit ={
+//          val rs = new RegAllocator()
+//          val rs1, rs2, rs3 = rs.allocate()
+//          val rd = Random.nextInt(32)
+//          fmv_w_x(rd, a)
+//          storeFloat(rd){v =>
+//            val ref = b2f(a)
+//            println(f"fmv_w_x $a = $v, $ref")
+//            assert(v === ref)
+//          }
+//        }
+
+
+
+        def testMinMaxExact(a : Float, b : Float, arg : Int): Unit ={
+          val rs = new RegAllocator()
+          val rs1, rs2 = rs.allocate()
+          val rd = Random.nextInt(32)
+          val ref = (a,b) match {
+            case _ if a.isNaN && b.isNaN => b2f(0x7FC00000)
+            case _ if a.isNaN => b
+            case _ if b.isNaN => a
+            case _ => if(arg == 0) Math.min(a,b) else Math.max(a,b)
+          }
+          val flag = (a,b) match {
+            case _ if a.isNaN && ((f2b(a) >> 22 ) & 1) == 0 => 16
+            case _ if b.isNaN && ((f2b(b) >> 22 ) & 1) == 0 => 16
+            case _ => 0
+          }
+          load(rs1, a)
+          load(rs2, b)
+
+          minMax(rd,rs1,rs2, arg, FpuFormat.FLOAT)
+          storeFloat(rd){v =>
+            assert(f2b(ref) ==  f2b(v), f"minMax($a $b $arg) = $v, $ref")
+          }
+          flagMatch(flag, f"minmax($a $b $arg)")
+        }
+
+        def testMinExact(a : Float, b : Float) : Unit = testMinMaxExact(a,b,0)
+        def testMaxExact(a : Float, b : Float) : Unit = testMinMaxExact(a,b,1)
+
+
+        def testMinMaxF64Exact(a : Double, b : Double, arg : Int): Unit ={
+          val rs = new RegAllocator()
+          val rs1, rs2 = rs.allocate()
+          val rd = Random.nextInt(32)
+          val ref = (a,b) match {
+            case _ if a.isNaN && b.isNaN => b2d(0x7ff8000000000000l)
+            case _ if a.isNaN => b
+            case _ if b.isNaN => a
+            case _ => if(arg == 0) Math.min(a,b) else Math.max(a,b)
+          }
+          val flag = (a,b) match {
+            case _ if a.isNaN && ((d2b(a) >> 51 ) & 1) == 0 => 16
+            case _ if b.isNaN && ((d2b(b) >> 51 ) & 1) == 0 => 16
+            case _ => 0
+          }
+          load(rs1, a)
+          load(rs2, b)
+
+          minMax(rd,rs1,rs2, arg, FpuFormat.DOUBLE)
+          store(rd){v =>
+            assert(d2b(ref) ==  d2b(v), f"minMax($a $b $arg) = $v, $ref")
+          }
+          flagMatch(flag, f"minmax($a $b $arg)")
+        }
+
+        def testMinF64Exact(a : Double, b : Double) : Unit = testMinMaxF64Exact(a,b,0)
+        def testMaxF64Exact(a : Double, b : Double) : Unit = testMinMaxF64Exact(a,b,1)
+
+
+        def testSgnjRaw(a : Float, b : Float): Unit ={
+          var ref = b2f((f2b(a) & ~0x80000000) | f2b(b) & 0x80000000)
+          if(a.isNaN) ref = a
+          testBinaryOp(sgnj,a,b,ref,0, null,"sgnj")
+        }
+        def testSgnjnRaw(a : Float, b : Float): Unit ={
+          var ref = b2f((f2b(a) & ~0x80000000) | ((f2b(b) & 0x80000000) ^ 0x80000000))
+          if(a.isNaN) ref = a
+          testBinaryOp(sgnjn,a,b,ref,0, null,"sgnjn")
+        }
+        def testSgnjxRaw(a : Float, b : Float): Unit ={
+          var ref = b2f(f2b(a) ^ (f2b(b) & 0x80000000))
+          if(a.isNaN) ref = a
+          testBinaryOp(sgnjx,a,b,ref,0, null,"sgnjx")
+        }
+
+        val f64SignMask = 1l << 63
+        def testSgnjF64Raw(a : Double, b : Double): Unit ={
+          var ref = b2d((d2b(a).toLong & ~f64SignMask) | d2b(b).toLong & f64SignMask)
+          if(a.isNaN) ref = a
+          testBinaryOpF64(sgnj,a,b,ref,0, null,"sgnj")
+        }
+        def testSgnjnF64Raw(a : Double, b : Double): Unit ={
+          var ref = b2d((d2b(a).toLong & ~f64SignMask) | ((d2b(b).toLong & f64SignMask) ^ f64SignMask))
+          if(a.isNaN) ref = a
+          testBinaryOpF64(sgnjn,a,b,ref,0, null,"sgnjn")
+        }
+        def testSgnjxF64Raw(a : Double, b : Double): Unit ={
+          var ref = b2d(d2b(a).toLong ^ (d2b(b).toLong & f64SignMask))
+          if(a.isNaN) ref = a
+          testBinaryOpF64(sgnjx,a,b,ref,0, null,"sgnjx")
+        }
+
+
+        def withMinus(that : Seq[Float]) = that.flatMap(f => List(f, -f))
+        val fZeros = withMinus(List(0.0f))
+        val fSubnormals = withMinus(List(b2f(0x00000000+1), b2f(0x00000000+2), b2f(0x00006800), b2f(0x00800000-2), b2f(0x00800000-1)))
+        val fExpSmall = withMinus(List(b2f(0x00800000), b2f(0x00800000+1), b2f(0x00800000 + 2)))
+        val fExpNormal = withMinus(List(b2f(0x3f800000-2), b2f(0x3f800000-1), b2f(0x3f800000), b2f(0x3f800000+1), b2f(0x3f800000+2)))
+        val fExpBig = withMinus(List(b2f(0x7f7fffff-2), b2f(0x7f7fffff-1), b2f(0x7f7fffff)))
+        val fInfinity = withMinus(List(Float.PositiveInfinity))
+        val fNan = List(Float.NaN, b2f(0x7f820000), b2f(0x7fc00000))
+        val fAll = fZeros ++ fSubnormals ++ fExpSmall ++ fExpNormal ++ fExpBig ++ fInfinity ++ fNan
+
+        val iSmall = (0 to 20)
+        val iBigUnsigned = (0 to 20).map(e => 0xFFFFFFFF - e)
+        val iBigSigned = (0 to 20).map(e => 0x7FFFFFFF - e) ++ (0 to 20).map(e => 0x80000000 + e)
+        val iUnsigned = iSmall ++ iBigUnsigned
+        val iSigned = iSmall ++ iSmall.map(-_) ++ iBigSigned
+
+
+        val roundingModes = FpuRoundMode.elements
+        def foreachRounding(body : FpuRoundMode.E => Unit): Unit ={
+          for(rounding <- roundingModes){
+            body(rounding)
+          }
+        }
+
+        def testFmaF32() : Unit = {
+          testFmaRaw(randomFloat(), randomFloat(), randomFloat())
+          flagClear()
+        }
+
+
+        def testFmaF64() : Unit = {
+          testFmaF64Raw(randomDouble(), randomDouble(), randomDouble())
+          flagClear()
+        }
+
+        def testLeF32() : Unit = {
+          val (a,b,i,f) = f32.le.RAW.f32_f32_i32
+          testLeRaw(a,b,i, f)
+        }
+        def testLtF32() : Unit = {
+          val (a,b,i,f) = f32.lt.RAW.f32_f32_i32
+          testLtRaw(a,b,i, f)
+        }
+
+        def testEqF32() : Unit = {
+          val (a,b,i,f) = f32.eq.RAW.f32_f32_i32
+          testEqRaw(a,b,i, f)
+        }
+
+        def testLeF64() : Unit = {
+          val (a,b,i,f) = f64.le.RAW.f64_f64_i32
+          testLeF64Raw(a,b,i, f)
+        }
+        def testLtF64() : Unit = {
+          val (a,b,i,f) = f64.lt.RAW.f64_f64_i32
+          testLtF64Raw(a,b,i, f)
+        }
+
+        def testEqF64() : Unit = {
+          val (a,b,i,f) = f64.eq.RAW.f64_f64_i32
+          testEqF64Raw(a,b,i, f)
+        }
+
+
+        def testF2uiF32() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,b,f) = f32.f2ui(rounding).f32_i32
+          testF2iExact(a,b, f, false, rounding)
+        }
+
+        def testF2iF32() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,b,f) = f32.f2i(rounding).f32_i32
+          testF2iExact(a,b, f, true, rounding)
+        }
+
+        def testF2uiF64() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,b,f) = f64.f2ui(rounding).f64_i32
+          testF642iExact(a,b, f, false, rounding)
+        }
+
+        def testF2iF64() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,b,f) = f64.f2i(rounding).f64_i32
+          testF642iExact(a,b, f, true, rounding)
+        }
+
+
+        def testDiv() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,b,r,f) = f32.div(rounding).f32_f32_f32
+          testBinaryOp(div, a, b, r, f, rounding, "div")
+        }
+
+        def testSqrt() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,r,f) = f32.sqrt(rounding).f32_f32
+          testSqrtExact(a, r, f, rounding)
+          flagClear()
+        }
+
+        def testSgnjF32() : Unit = {
+          testSgnjRaw(b2f(Random.nextInt()), b2f(Random.nextInt()))
+          testSgnjnRaw(b2f(Random.nextInt()), b2f(Random.nextInt()))
+          testSgnjxRaw(b2f(Random.nextInt()), b2f(Random.nextInt()))
+          val (a,b,r,f) = f32.sgnj.RAW.f32_f32_i32
+          testSgnjRaw(a, b)
+          testSgnjnRaw(a, b)
+          testSgnjxRaw(a, b)
+        }
+
+        def testDivF64() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,b,r,f) = f64.div(rounding).f64_f64_f64
+         // testDivF64Exact(a, b, r, f, rounding)
+          testBinaryOpF64(div, a, b, r, f,rounding, "div")
+          flagClear()
+        }
+
+        def testSqrtF64() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,r,f) = f64.sqrt(rounding).f64_f64
+          testSqrtF64Exact(a, r, f, rounding)
+          flagClear()
+        }
+
+        def testSgnjF64() : Unit = {
+          testSgnjF64Raw(b2d(Random.nextLong()), b2d(Random.nextLong()))
+          testSgnjnF64Raw(b2d(Random.nextLong()), b2d(Random.nextLong()))
+          testSgnjxF64Raw(b2d(Random.nextLong()), b2d(Random.nextLong()))
+          val (a,b,r,f) = f64.sgnj.RAW.f64_f64_i32
+          testSgnjF64Raw(a, b)
+          testSgnjnF64Raw(a, b)
+          testSgnjxF64Raw(a, b)
+        }
+
+
+        def testTransferF32() : Unit = {
+          val (a,b,r,f) = f32.transfer.RAW.f32_f32_i32
+          testTransferF32Raw(a, Random.nextBoolean(), Random.nextBoolean())
+        }
+
+        def testTransferF64() : Unit = {
+          val (a,b,r,f) = f64.transfer.RAW.f64_f64_i32
+          testTransferF64Raw(a)
+        }
+
+        def testTransferF64F32() : Unit = {
+          val (a,b,r,f) = f64.f32.RAW.f64_f64_i32
+          testTransferF64F32Raw(a, Random.nextBoolean())
+        }
+        def testTransferF32F64() : Unit = {
+          val (a,b,r,f) = f32.f64.RAW.f32_f32_i32
+          testTransferF32F64Raw(a, Random.nextBoolean())
+        }
+
+        def testCvtF32F64() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,r,f) = f32.cvt64(rounding).f32_f64_i32
+          testCvtF32F64Raw(a, r, f, rounding)
+        }
+        def testCvtF64F32() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,r,f) = f64.cvt32(rounding).f64_f32_i32
+          testCvtF64F32Raw(a, r, f, rounding)
+        }
+
+        def testClassF32() : Unit = {
+          val (a,b,r,f) = f32.fclass.RAW.f32_f32_i32
+          testClassRaw(a)
+        }
+
+        def testMinF32() : Unit = {
+          val (a,b,r,f) = f32.min.RAW.f32_f32_f32
+          testMinExact(a,b)
+        }
+        def testMaxF32() : Unit = {
+          val (a,b,r,f) = f32.max.RAW.f32_f32_f32
+          testMaxExact(a,b)
+        }
+
+        def testClassF64() : Unit = {
+          val (a,b,r,f) = f64.fclass.RAW.f64_f64_i32
+          testClassF64Raw(a)
+        }
+
+        def testMinF64() : Unit = {
+          val (a,b,r,f) = f64.min.RAW.f64_f64_f64
+          testMinF64Exact(a,b)
+        }
+        def testMaxF64() : Unit = {
+          val (a,b,r,f) = f64.max.RAW.f64_f64_f64
+          testMaxF64Exact(a,b)
+        }
+
+
+        def testUI2f32() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,b,f) = f32.i2f(rounding).i32_f32
+          testI2fExact(a,b,f, true, rounding)
+        }
+
+        def testI2f32() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,b,f) = f32.ui2f(rounding).i32_f32
+          testI2fExact(a,b,f, false, rounding)
+        }
+
+        def testUI2f64() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,b,f) = f64.i2f(rounding).i32_f64
+          testI2f64Exact(a,b,f, true, rounding)
+        }
+
+        def testI2f64() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,b,f) = f64.ui2f(rounding).i32_f64
+          testI2f64Exact(a,b,f, false, rounding)
+        }
+
+        def testMulF32() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,b,c,f) = f32.mul(rounding).f32_f32_f32
+          testBinaryOp(mul,a,b,c,f, rounding,"mul")
+        }
+
+        def testAddF32() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,b,c,f) = f32.add(rounding).f32_f32_f32
+          testBinaryOp(add,a,b,c,f, rounding,"add")
+        }
+
+        def testSubF32() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,b,c,f) = f32.sub(rounding).f32_f32_f32
+          testBinaryOp(sub,a,b,c,f, rounding,"sub")
+        }
+
+
+        def testMulF64() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,b,c,f) = f64.mul(rounding).f64_f64_f64
+          testBinaryOpF64(mul,a,b,c,f, rounding,"mul")
+        }
+
+        def testAddF64() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,b,c,f) = f64.add(rounding).f64_f64_f64
+          testBinaryOpF64(add,a,b,c,f, rounding,"add")
+        }
+
+        def testSubF64() : Unit = {
+          val rounding = FpuRoundMode.elements.randomPick()
+          val (a,b,c,f) = f64.sub(rounding).f64_f64_f64
+          testBinaryOpF64(sub,a,b,c,f, rounding,"sub")
+        }
+
+
+        val f32Tests = List[() => Unit](testSubF32, testAddF32, testMulF32, testI2f32, testUI2f32, testMinF32, testMaxF32, testSgnjF32, testTransferF32, testDiv, testSqrt, testF2iF32, testF2uiF32, testLeF32, testEqF32, testLtF32, testClassF32, testFmaF32)
+        val f64Tests = List[() => Unit](testSubF64, testAddF64, testMulF64, testI2f64, testUI2f64, testMinF64, testMaxF64, testSgnjF64, testTransferF64, testDiv, testSqrt, testF2iF64, testF2uiF64, testLeF64, testEqF64, testLtF64, testClassF64, testFmaF64, testCvtF32F64, testCvtF64F32)
+
+        var fxxTests = f32Tests
+        if(p.withDouble) fxxTests ++= f64Tests
+
+//5071920 5225560
+//        for(v <- List(-1.17549435082e-38f, 1.17549435082e-38f);
+//            rounding <- FpuRoundMode.elements) {
+//          for (i <- 0 until 2048) {
+//            val b = d2b(v)// 0x0010000000000000l //d2b(1.17549435082e-38)
+//            val s = (b - (i.toLong << 21)).toLong
+//            val d = b2d(s)
+////            val rounding = FpuRoundMode.RNE
+//            testCvtF64F32Raw(d, Clib.math.d2f(d, rounding.position), Clib.math.d2fFlag(d, rounding.position), rounding)
+//          }
+//        }
+//
+//
+//        testCvtF64F32Raw(-1.1754943508051483E-38, -1.17549435E-38f, 1, FpuRoundMode.RNE)
+//        testCvtF64F32Raw( 1.1754943157898258E-38, 1.17549435E-38f , 3, FpuRoundMode.RMM)
+//        testCvtF64F32Raw( 1.1754942807573643E-38, 1.17549435E-38f , 3, FpuRoundMode.RMM)
+//        testCvtF64F32Raw(-1.1754943508051483E-38, -1.17549435E-38f, 1, FpuRoundMode.RMM)
+
+        //-1.1754943508051483E-38 -1.17549435E-38 1 RNE @ 592770
+        // 1.1754943157898258E-38 1.17549435E-38 3 RMM  @ 2697440
+        // 1.1754942807573643E-38 1.17549435E-38 3 RMM
+//        for(_ <- 0 until 1000000) testCvtF64F32() // 1 did not equal 3 Flag missmatch dut=1 ref=3 testCvtF64F32Raw 1.1754942807573643E-38 1.17549435E-38 RMM
+//        println("FCVT_D_S done")
+
+//        testBinaryOpF64(div, -2.2250738564511294E-308, 4.294967296003891E9,  -5.180654E-318, 1, FpuRoundMode.RDN,"div") // ??? wtf
+
+//        testBinaryOp(add,b2f(0x7F800000),b2f(0x1FD << 23),b2f(0x7F800000),0, FpuRoundMode.RNE,"add")
+
+
+
+//        testBinaryOp(mul,1.1753509E-38f, 1.0001221f ,1.17549435E-38f,1, FpuRoundMode.RNE,"mul")
+//
+//        for(i <- 0 until 10000000){
+//          val rounding = FpuRoundMode.elements.randomPick()
+//          val (a,b,c,f) = f32.mul(rounding).f32_f32_f32
+//          testBinaryOp(mul,a,b,c,f, rounding,"mul")
+//        }
+//
+//        testBinaryOpF64(mul,2.781342323134002E-309, 7.999999999999999, 2.2250738585072014E-308, 3, FpuRoundMode.RNE,"mul")
+////        for(i <- 0 until 10000000){
+////          val rounding = FpuRoundMode.RNE
+////          val (a,b,c,f) = f64.mul(rounding).f64_f64_f64
+////          testBinaryOpF64(mul,a,b,c,f, rounding,"mul")
+////        }
+//        for(_ <- 0 until 100000000) testMulF64()
+//        println("f64 Mul done")
+//
+//        for(_ <- 0 until 10000) testDivF64()
+//        println("f64 div done")
+//
+//
+//        for(_ <- 0 until 10000) testDiv()
+//        println("f32 div done")
+//
+//        for(_ <- 0 until 10000) testAddF32()
+//        for(_ <- 0 until 10000) testSubF32()
+//
+//        println("Add done")
+//
+//
+//        for(_ <- 0 until 10000) testSqrt()
+//        println("f32 sqrt done")
+
+
+
+
+
+
+        //TODO test boxing
+        //TODO double <-> simple convertions
+        if(p.withDouble) {
+
+          testSqrtF64Exact(1.25*1.25, 1.25, 0, FpuRoundMode.RNE)
+          testSqrtF64Exact(1.5*1.5, 1.5, 0, FpuRoundMode.RNE)
+
+          for(_ <- 0 until 10000) testSqrtF64()
+          println("f64 sqrt done")
+
+//          testDivF64Exact(1.0, 8.0, 0.125, 0, FpuRoundMode.RNE)
+//          testDivF64Exact(4.0, 8.0, 0.5, 0, FpuRoundMode.RNE)
+//          testDivF64Exact(8.0, 8.0, 1.0, 0, FpuRoundMode.RNE)
+//          testDivF64Exact(1.5, 2.0, 0.75, 0, FpuRoundMode.RNE)
+//          testDivF64Exact(1.875, 1.5, 1.25, 0, FpuRoundMode.RNE)
+
+          for(_ <- 0 until 10000) testDivF64()
+          println("f64 div done")
+
+          for(_ <- 0 until 10000) testSgnjF64()
+          println("f64 sgnj done")
+
+          for(_ <- 0 until 10000) testSgnjF32()
+          println("f32 sgnj done")
+
+          //380000000001ffef 5fffffffffff9ff 8000000000100000
+//          testBinaryOpF64(mul,-5.877471754282472E-39, 8.814425663400984E-280, -5.180654E-318 ,1, FpuRoundMode.RMM,"mul")
+//          5.877471754282472E-39 8.814425663400984E-280 -5.180654E-318 RMM
+
+          for(_ <- 0 until 10000) testCvtF64F32() // 1 did not equal 3 Flag missmatch dut=1 ref=3 testCvtF64F32Raw 1.1754942807573643E-38 1.17549435E-38 RMM
+          println("FCVT_D_S done")
+          for(_ <- 0 until 10000) testCvtF32F64()
+          println("FCVT_S_D done")
+
+          for(_ <- 0 until 10000) testF2iF64()
+          println("f64 f2i done")
+          for(_ <- 0 until 10000) testF2uiF64()
+          println("f64 f2ui done")
+
+
+
+
+
+          for(_ <- 0 until 10000) testMinF64()
+          for(_ <- 0 until 10000) testMaxF64()
+          println("f64 minMax done")
+
+
+
+          for(i <- 0 until 1000) testFmaF64()
+          flagClear()
+          println("f64 fma done") //TODO
+
+
+          for(_ <- 0 until 10000) testLeF64()
+          for(_ <- 0 until 10000) testLtF64()
+          for(_ <- 0 until 10000) testEqF64()
+          println("f64 Cmp done")
+
+
+          for(_ <- 0 until 10000) testClassF64()
+          println("f64 class done")
+//
+
+
+
+
+
+
+
+
+          for(_ <- 0 until 10000) testAddF64()
+          for(_ <- 0 until 10000) testSubF64()
+          println("f64 Add done")
+
+
+          //          testI2f64Exact(0x7FFFFFF5, 0x7FFFFFF5, 0, true, FpuRoundMode.RNE)
+          for(_ <- 0 until 10000) testUI2f64()
+          for(_ <- 0 until 10000) testI2f64()
+          println("f64 i2f done")
+
+
+
+//          testF2iExact(1.0f,1, 0, false, FpuRoundMode.RTZ)
+//          testF2iExact(2.0f,2, 0, false, FpuRoundMode.RTZ)
+//          testF2iExact(2.5f,2, 1, false, FpuRoundMode.RTZ)
+
+
+
+
+          testBinaryOpF64(mul,1.0, 1.0, 1.0,0 , FpuRoundMode.RNE,"mul")
+          testBinaryOpF64(mul,1.0, 2.0, 2.0,0 , FpuRoundMode.RNE,"mul")
+          testBinaryOpF64(mul,2.5, 2.0, 5.0,0 , FpuRoundMode.RNE,"mul")
+
+          for(_ <- 0 until 10000) testMulF64()
+          println("f64 Mul done")
+
+          testTransferF64Raw(1.0)
+          testTransferF64Raw(2.0)
+          testTransferF64Raw(2.5)
+          testTransferF64Raw(6.97949770801e-39)
+          testTransferF64Raw(8.72437213501e-40)
+          testTransferF64Raw(5.6E-45)
+
+          testTransferF32F64Raw(b2f(0xFFFF1234), false)
+          testTransferF64F32Raw(b2d(0xFFF123498765463l << 4), false)
+          testTransferF32F64Raw(b2f(0xFFFF1234), true)
+          testTransferF64F32Raw(b2d(0xFFF123498765463l << 4), true)
+
+          for (_ <- 0 until 10000) testTransferF64()
+          println("f64 load/store/rf transfer done")
+
+          for (_ <- 0 until 10000) testTransferF64F32()
+          println("f64 -> f32 load/store/rf transfer done")
+
+          for (_ <- 0 until 10000) testTransferF32F64()
+          println("f32 -> f64 load/store/rf transfer done")
+
+        }
+
+        for(_ <- 0 until 10000) testTransferF32()
+        println("f32 load/store/rf transfer done")
+
+        for(_ <- 0 until 10000) testMulF32()
+        println("Mul done")
+
+
+        for(_ <- 0 until 10000) testUI2f32()
+        for(_ <- 0 until 10000) testI2f32()
+        println("i2f done")
+
+
+        testF2iExact(1.0f,1, 0, false, FpuRoundMode.RTZ)
+        testF2iExact(2.0f,2, 0, false, FpuRoundMode.RTZ)
+        testF2iExact(2.5f,2, 1, false, FpuRoundMode.RTZ)
+
+
+
+
+
+        for(_ <- 0 until 10000) testF2uiF32()
+        for(_ <- 0 until 10000) testF2iF32()
+        println("f2i done")
+
+
+
+//        waitUntil(cmdQueue.isEmpty)
+//        dut.clockDomain.waitSampling(1000)
+//        simSuccess()
+
+
+
+        for(i <- 0 until 1000) testFmaF32()
+        flagClear()
+        println("fma done") //TODO
+
+
+        testF2iExact(-2.14748365E9f, -2147483648, 0, true, FpuRoundMode.RDN)
+
+        testEqRaw(Float.PositiveInfinity,Float.PositiveInfinity,1, 0)
+        testEqRaw(0f, 0f,1, 0)
+
+        for(_ <- 0 until 10000) testLeF32()
+        for(_ <- 0 until 10000) testLtF32()
+        for(_ <- 0 until 10000) testEqF32()
+        println("Cmp done")
+
+
+        for(_ <- 0 until 10000) testDiv()
+        println("f32 div done")
+
+        for(_ <- 0 until 10000) testSqrt()
+        println("f32 sqrt done")
+
+        for(_ <- 0 until 10000) testSgnjF32()
+        println("f32 sgnj done")
+
+
+        for(_ <- 0 until 10000) testClassF32()
+        println("f32 class done")
+
+
+        for(_ <- 0 until 10000) testMinF32()
+        for(_ <- 0 until 10000) testMaxF32()
+        println("minMax done")
+
+
+
+        testBinaryOp(mul,1.469368E-39f, 7.9999995f, 1.17549435E-38f,3, FpuRoundMode.RUP,"mul")
+        testBinaryOp(mul,1.1753509E-38f, 1.0001221f, 1.17549435E-38f ,1, FpuRoundMode.RUP,"mul")
+        testBinaryOp(mul, 1.1754942E-38f, -1.0000001f, -1.17549435E-38f,1, FpuRoundMode.RNE,"mul")
+        testBinaryOp(mul, 1.1754942E-38f, -1.0000001f, -1.17549435E-38f,1, FpuRoundMode.RDN,"mul")
+        testBinaryOp(mul, 1.1754942E-38f, -1.0000001f, -1.17549435E-38f,1, FpuRoundMode.RMM,"mul")
+
+        testBinaryOp(mul, 1.1754945E-38f, 0.9999998f, 1.17549435E-38f, 3, FpuRoundMode.RUP, "mul")
+        testBinaryOp(mul, 1.1754945E-38f, -0.9999998f, -1.17549435E-38f, 3, FpuRoundMode.RDN, "mul")
+        testBinaryOp(mul, 1.1754946E-38f, 0.9999997f, 1.17549435E-38f, 3, FpuRoundMode.RUP, "mul")
+        testBinaryOp(mul, 1.1754946E-38f, -0.9999997f, -1.17549435E-38f, 3, FpuRoundMode.RDN, "mul")
+        testBinaryOp(mul, 1.1754949E-38f, 0.99999946f, 1.17549435E-38f, 3, FpuRoundMode.RUP, "mul")
+        testBinaryOp(mul, 1.1754949E-38f, -0.99999946f, -1.17549435E-38f, 3, FpuRoundMode.RDN, "mul")
+        testBinaryOp(mul, 1.1754955E-38f, 0.999999f, 1.17549435E-38f, 3, FpuRoundMode.RUP, "mul")
+
+
+
+
+
+
+        for(_ <- 0 until 10000) testAddF32()
+        for(_ <- 0 until 10000) testSubF32()
+
+        println("Add done")
+
+
+
+
+
+
+
+
+
+
+//        waitUntil(cmdQueue.isEmpty)
+//        dut.clockDomain.waitSampling(1000)
+//        simSuccess()
+
+        for(i <- 0 until 100000) fxxTests.randomPick()()
+        waitUntil(cpu.rspQueue.isEmpty)
+      }
+
+
+
+
+      stim.foreach(_.join())
+      dut.clockDomain.waitSampling(100)
+    }
+  }
+}
+
+
+//object Clib {
+//  val java_home = System.getProperty("java.home")
+//  assert(java_home != "" && java_home != null, "JAVA_HOME need to be set")
+//  val jdk = java_home.replace("/jre","").replace("\\jre","")
+//  val jdkIncludes = jdk + "/include"
+//  val flags   = List("-fPIC", "-m64", "-shared", "-Wno-attributes") //-Wl,--whole-archive
+//  val os = new File("/media/data/open/SaxonSoc/berkeley-softfloat-3/build/Linux-x86_64-GCC").listFiles().map(_.getAbsolutePath).filter(_.toString.endsWith(".o"))
+//  val cmd = s"gcc -I/media/data/open/SaxonSoc/berkeley-softfloat-3/source/include -I$jdkIncludes  -I$jdkIncludes/linux ${flags.mkString(" ")} -o src/test/cpp/fpu/math/fpu_math.so src/test/cpp/fpu/math/fpu_math.c src/test/cpp/fpu/math/softfloat.a" // src/test/cpp/fpu/math/softfloat.a
+//  DoCmd.doCmd(cmd)
+//  val math = new FpuMath
+//}
+// cd /media/data/open/SaxonSoc/testFloatBuild/berkeley-softfloat-3/build/Linux-x86_64-GCC
+// make clean && SPECIALIZE_TYPE=RISCV make -j$(nproc) && cp softfloat.a /media/data/open/SaxonSoc/artyA7SmpUpdate/SaxonSoc/ext/VexRiscv/src/test/cpp/fpu/math
+//object FpuCompileSo extends App{
+//
+////  val b2f = lang.Float.intBitsToFloat(_)
+////  for(e <- FpuRoundMode.elements) {
+////    println(e)
+////    for (i <- -2 until 50) println(i + " => " + Clib.math.addF32(b2f(0x7f000000), b2f(0x7f000000 + i), e.position))
+////    println("")
+////  }
+//  //1 did not equal 3 Flag missmatch dut=1 ref=3 ## mul 0.9994812 -1.1754988E-38 -1.174889E-38 RMM
+//  //  println(Clib.math.mulF32(0.9994812f, -1.1754988E-38f, FpuRoundMode.RMM.position))
+////  miaou ffffffff 7fffffe0 7f
+////  miaou 0 3ffffff0 70 = 0
+//
+//  val b2f = lang.Float.intBitsToFloat(_)
+//  val b2d = lang.Double.longBitsToDouble(_)
+//  val f2b = lang.Float.floatToRawIntBits(_)
+//  val d2bOffset = BigInt("10000000000000000",16)
+//  def d2b(that : Double) = {
+//    val l = lang.Double.doubleToRawLongBits(that)
+//    var a = BigInt(l)
+//    if(l < 0) {
+//      a = d2bOffset + a
+//    }
+//    a
+//  }
+//  val builder =new  StringBuilder()
+//  for(i <- 0 until 256){
+////    builder ++= (Clib.math.mulF32(1.17548538251e-38f, b2f(f2b(1.0f)+i),0)).toString + "\n"
+//    val b = d2b(1.17549435082e-38)
+//    val s = (b-(i.toLong << 25)).toLong
+//    val d = b2d(s)
+//    builder ++= f"$b $s $d => "
+//    builder ++= f"${d2b(d)}%x   " + (Clib.math.d2fFlag(d,0)).toString + " " + d + " => " + (Clib.math.d2f(d,FpuRoundMode.RMM.position)).toString + "\n"
+//  }
+//
+//  Thread.sleep(400)
+//  println(builder.toString)
+//    println(Clib.math.mulF32( 1.1753509E-38f, 1.0001221f, FpuRoundMode.RUP.position))
+//    println(Clib.math.mulF32( 1.1754945E-38f, 0.9999998f, FpuRoundMode.RUP.position))
+//  testBinaryOp(mul, 1.1753509E-38f, 1.0001221f, 1.17549435E-38f ,1, FpuRoundMode.RUP,"mul")
+//  testBinaryOp(mul, 1.1754945E-38f, 0.9999998f, 1.17549435E-38f, 3, FpuRoundMode.RUP, "mul")
+//  miaou ffffffff 7fffffe0 7f
+//  miaou 0 3ffffff0 70 = 0
+//  miaou ffffffff 7fffff7e 7f
+//  miaou 1 3fffffbf 3f = 1
+
+//  println(Clib.math.mulF32( 1.1753509E-38f, 1.0001221f, FpuRoundMode.RUP.position))
+//  println(Clib.math.mulF32( 1.469368E-39f, 7.9999995f, FpuRoundMode.RUP.position))
+//  println(Clib.math.mulF32( 1.40129846432e-45f, 7.9999995f, FpuRoundMode.RUP.position))
+//  println(Clib.math.mulF32( 2.93873587706e-39f, 7.9999995f, FpuRoundMode.RUP.position))
+//  println(Clib.math.mulF32( 1f, 7.9999995f, FpuRoundMode.RUP.position))
+
+
+//  println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RNE.position))
+//  println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RTZ.position))
+//  println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RDN.position))
+//  println(Clib.math.addF32(1.00000011921f, 4.0f, FpuRoundMode.RUP.position))
+//}
+
+class ProcessStream(cmd : String){
+  import sys.process._
+
+  val buf = mutable.Queue[() => String]()
+  val p = Process(cmd).run(new ProcessLogger {
+    override def out(s: => String): Unit = {
+      while(buf.size > 10000) Thread.sleep(10)
+      buf.enqueue(() => s)
+    }
+    override def err(s: => String): Unit = {}
+    override def buffer[T](f: => T): T = f
+  })
+
+  def kill = p.destroy()
+  def next = {
+    while(buf.isEmpty) { Thread.sleep(10) }
+    buf.dequeue()()
+  }
+}
+
diff --git a/VexRiscv/src/test/scala/vexriscv/ip/fpu/Playground.scala b/VexRiscv/src/test/scala/vexriscv/ip/fpu/Playground.scala
new file mode 100644
index 0000000..a155210
--- /dev/null
+++ b/VexRiscv/src/test/scala/vexriscv/ip/fpu/Playground.scala
@@ -0,0 +1,47 @@
+package vexriscv.ip.fpu
+
+object MiaouDiv extends App{
+  val input = 2.5
+  var output = 1/(input*0.95)
+
+  def y = output
+  def x = input
+
+  for(i <- 0 until 10) {
+    output = 2 * y - x * y * y
+    println(output)
+  }
+
+
+  //output = x*output
+  println(1/input)
+}
+
+object MiaouSqrt extends App{
+  val input = 2.0
+  var output = 1/Math.sqrt(input*0.95)
+  //  def x = output
+  //  def y = input
+
+  def y = output
+  def x = input
+
+  for(i <- 0 until 10) {
+    output = y * (1.5 - x * y * y / 2)
+    println(output)
+  }
+
+  output = x*output
+  println(output)
+  println(s"ref ${Math.sqrt(input)}")
+}
+
+
+object MiaouNan extends App{
+  println(Float.NaN + 3.0f)
+  println(3.0f + Float.NaN )
+  println(0.0f*Float.PositiveInfinity )
+  println(1.0f/0.0f )
+  println(Float.MaxValue -1 )
+  println(Float.PositiveInfinity - Float.PositiveInfinity)
+}
\ No newline at end of file
-- 
cgit v1.2.3