diff --git a/.gitignore b/.gitignore
index 7f1bd0b2cd..e48e8148d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,10 +8,13 @@ vgcore*
 core.*
 core
 *.lo
+*.a
+*.so
 *.la
 **/data
-**/*-runner
 **/runner
+**/*-runner
+**/llvm-build-temp
 tce/Makefile
 Makefile.in
 tce/Makefile.in
@@ -25,6 +28,31 @@ compiletest.*.log
 testsuite/systemtest/bintools/Scheduler/**/program.bc
 testsuite/systemtest_long/bintools/Scheduler/**/program.bc
 testsuite/systemtest_long/bintools/Compiler/**/program.bc
+tce/test/**/*.tpf
+tce/test/**/*.tpef
+tce/test/**/*.bin
+tce/test/applibs/Assembler/AssemblerTest/data/temp.tceasm
+tce/test/applibs/Explorer/DesignSpaceExplorerTest/data/test.dsdb
+tce/test/applibs/Explorer/ExplorerPluginTest/data/test.dsdb
+tce/test/applibs/TraceDB/ExecutionTraceTest/data/
+tce/test/applibs/bem/BEMGeneratorTest/data/complex_bem.bem
+tce/test/applibs/bem/BEMGeneratorTest/data/par_bem.bem
+tce/test/applibs/dsdb/DSDBManagerTest/data/1.adf
+tce/test/applibs/dsdb/DSDBManagerTest/data/1.idf
+tce/test/applibs/dsdb/DSDBManagerTest/dsdb1.ddb
+tce/test/applibs/dsdb/DSDBManagerTest/dsdb2.ddb
+tce/test/applibs/wxToolkit/GUIOptions/GUIOptionsSerializerTest/gui.config
+tce/test/base/bem/BEMSerializerTest/data/
+tce/test/base/idf/IDFSerializerTest/data/new.idf
+tce/test/base/mach/ADFSerializerTest/data/written.mdf
+tce/test/base/osal/OperationSerializerTest/data/written.opp
+tce/test/tools/RelationalDBTest/data/
+tce/test/tools/XMLSerializerTest/data/written.conf
+compiletest.log
+compiletest.error.log
+systemtest
+systemtest_long
+systemtest_longlong
 RE:.*/Makefile.in
 tce/src/*/*/Makefile
 tce/src/*/*/*/Makefile
@@ -39,7 +67,7 @@ tce/*/Makefile
 tce/src/tools/tce_version_string.h
 tce/newlib-1.17.0/tce-llvm
 RE:.*/.deps
-RE:.*/.libs
+**/.libs
 tce/test/Makefile.in
 RE:tce/src/.*/Makefile
 RE:tce/test/.*/Makefile.in
@@ -81,6 +109,7 @@ tce/src/procgen/ProDe/prode
 tce/src/procgen/ProGe/buildicdecoderplugin
 tce/src/procgen/ProGe/generateprocessor
 tce/src/procgen/ProGe/ttaunittester
+tce/src/procgen/MachInfo/machinfo
 tce/a.out
 tce/aclocal.m4
 tce/autom4te.cache/
diff --git a/tce/INSTALL b/tce/INSTALL
index f1324f8a53..46bee91a8f 100644
--- a/tce/INSTALL
+++ b/tce/INSTALL
@@ -1,3 +1,14 @@
+Quick start
+===========
+
+Quick installation instructions are included for several operating systems.  If
+you're running a Debian-based Linux system (e.g. Ubuntu), follow the
+instructions in INSTALL.Debian.  Similarly, for RedHat-related distributions
+(e.g. CentOS), follow the instructions in INSTALL.RedHat.
+
+The remainder of this document goes into more detail on the dependencies,
+versions, and installation procedure for a Unix-style operating system.
+
 Operating system
 ================
 
diff --git a/tce/newlib-1.17.0/newlib/libc/include/machine/ieeefp.h b/tce/newlib-1.17.0/newlib/libc/include/machine/ieeefp.h
index 0760896b84..d46ba26bc6 100644
--- a/tce/newlib-1.17.0/newlib/libc/include/machine/ieeefp.h
+++ b/tce/newlib-1.17.0/newlib/libc/include/machine/ieeefp.h
@@ -324,7 +324,6 @@
 
 #ifdef __TCE__
 #define __IEEE_BIG_ENDIAN
-#define _DOUBLE_IS_32BITS
 /* 
    This define is used but never defined anywhere.. maybe it's not needed at all
    #define NO_LONG_DOUBLE 
diff --git a/tce/opset/base/base.cc b/tce/opset/base/base.cc
index d57d8a439f..4f69a414a8 100644
--- a/tce/opset/base/base.cc
+++ b/tce/opset/base/base.cc
@@ -493,26 +493,31 @@ END_TRIGGER;
 END_OPERATION(NOT)
 
 //////////////////////////////////////////////////////////////////////////////
-// NEQF - arithmetic negation, floating-point
+// NEGF - arithmetic negation, single-precision floating-point
 //////////////////////////////////////////////////////////////////////////////
 OPERATION(NEGF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE) {
-        FloatWord in = FLT(1);
-        in = -in;
-        IO(2) = in;
-    } else if (BWIDTH(1) == DBL_WORD_SIZE) {
-        DoubleWord in = DBL(1);
-        in = -in;
-        IO(2) = in;
-    } else {
-        abortWithError("bit width of operand erronous");
-    }
+    FloatWord in = FLT(1);
+    in = -in;
+    IO(2) = in;
 END_TRIGGER;
 
 END_OPERATION(NEGF)
 
+//////////////////////////////////////////////////////////////////////////////
+// NEGD - arithmetic negation, double-precision floating-point
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(NEGD)
+
+TRIGGER
+    DoubleWord in = DBL(1);
+    in = -in;
+    IO(2) = in;
+END_TRIGGER;
+
+END_OPERATION(NEGD)
+
 //////////////////////////////////////////////////////////////////////////////
 // MUL - integer multiply
 //////////////////////////////////////////////////////////////////////////////
@@ -598,13 +603,7 @@ END_OPERATION(MODU)
 OPERATION(ADDF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = FLT(1) + FLT(2);
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = DBL(1) + DBL(2);
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = FLT(1) + FLT(2);
 END_TRIGGER;
 
 END_OPERATION(ADDF)
@@ -615,13 +614,7 @@ END_OPERATION(ADDF)
 OPERATION(SUBF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = FLT(1) - FLT(2);
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = DBL(1) - DBL(2);
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = FLT(1) - FLT(2);
 END_TRIGGER;
 
 END_OPERATION(SUBF)
@@ -632,13 +625,7 @@ END_OPERATION(SUBF)
 OPERATION(MULF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = FLT(1) * FLT(2);
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = DBL(1) * DBL(2);
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = FLT(1) * FLT(2);
 END_TRIGGER;
 
 END_OPERATION(MULF)
@@ -649,13 +636,7 @@ END_OPERATION(MULF)
 OPERATION(DIVF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = FLT(1) / FLT(2);
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = DBL(1) / DBL(2);
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = FLT(1) / FLT(2);
 END_TRIGGER;
 
 END_OPERATION(DIVF)
@@ -666,13 +647,7 @@ END_OPERATION(DIVF)
 OPERATION(EQF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) == FLT(2)) ? 1 : 0;
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) == DBL(2)) ? 1 : 0;
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) == FLT(2)) ? 1 : 0;
 END_TRIGGER;
 
 END_OPERATION(EQF)
@@ -684,13 +659,7 @@ END_OPERATION(EQF)
 OPERATION(EQUF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) == FLT(2)) ? 1 : 0;
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) == DBL(2)) ? 1 : 0;
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) == FLT(2)) ? 1 : 0;
 END_TRIGGER;
 
 END_OPERATION(EQUF)
@@ -702,13 +671,7 @@ END_OPERATION(EQUF)
 OPERATION(NEF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) != FLT(2)) ? 1 : 0;
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) != DBL(2)) ? 1 : 0;
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) != FLT(2)) ? 1 : 0;
 END_TRIGGER;
 
 END_OPERATION(NEF)
@@ -720,13 +683,7 @@ END_OPERATION(NEF)
 OPERATION(NEUF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) != FLT(2)) ? 1 : 0;
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) != DBL(2)) ? 1 : 0;
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) != FLT(2)) ? 1 : 0;
 END_TRIGGER;
 
 END_OPERATION(NEUF)
@@ -737,13 +694,7 @@ END_OPERATION(NEUF)
 OPERATION(GTF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) > FLT(2)) ? 1 : 0;
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) > DBL(2)) ? 1 : 0;
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) > FLT(2)) ? 1 : 0;
 END_TRIGGER;
 
 END_OPERATION(GTF)
@@ -754,13 +705,7 @@ END_OPERATION(GTF)
 OPERATION(GTUF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) > FLT(2)) ? 1 : 0;
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) > DBL(2)) ? 1 : 0;
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) > FLT(2)) ? 1 : 0;
 END_TRIGGER;
 
 END_OPERATION(GTUF)
@@ -771,13 +716,7 @@ END_OPERATION(GTUF)
 OPERATION(GEF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) >= FLT(2)) ? 1 : 0;
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) >= DBL(2)) ? 1 : 0;
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) >= FLT(2)) ? 1 : 0;
 END_TRIGGER;
 
 END_OPERATION(GEF)
@@ -788,13 +727,7 @@ END_OPERATION(GEF)
 OPERATION(GEUF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) >= FLT(2)) ? 1 : 0;
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) >= DBL(2)) ? 1 : 0;
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) >= FLT(2)) ? 1 : 0;
 END_TRIGGER;
 
 END_OPERATION(GEUF)
@@ -805,13 +738,7 @@ END_OPERATION(GEUF)
 OPERATION(LTF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) < FLT(2)) ? 1 : 0;
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) < DBL(2)) ? 1 : 0;
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) < FLT(2)) ? 1 : 0;
 END_TRIGGER;
 
 END_OPERATION(LTF)
@@ -823,13 +750,7 @@ END_OPERATION(LTF)
 OPERATION(LTUF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) < FLT(2)) ? 1 : 0;
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) < DBL(2)) ? 1 : 0;
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) < FLT(2)) ? 1 : 0;
 END_TRIGGER;
 
 END_OPERATION(LTUF)
@@ -840,13 +761,7 @@ END_OPERATION(LTUF)
 OPERATION(LEF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) <= FLT(2)) ? 1 : 0;
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) <= DBL(2)) ? 1 : 0;
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) <= FLT(2)) ? 1 : 0;
 END_TRIGGER;
 
 END_OPERATION(LEF)
@@ -857,13 +772,7 @@ END_OPERATION(LEF)
 OPERATION(LEUF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) <= FLT(2)) ? 1 : 0;
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) <= DBL(2)) ? 1 : 0;
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) <= FLT(2)) ? 1 : 0;
 END_TRIGGER;
 
 END_OPERATION(LEUF)
@@ -875,13 +784,7 @@ END_OPERATION(LEUF)
 OPERATION(ORDF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2))) ? 1 : 0;
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2))) ? 1 : 0;
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2))) ? 1 : 0;
 END_TRIGGER;
 
 END_OPERATION(ORDF)
@@ -893,17 +796,217 @@ END_OPERATION(ORDF)
 OPERATION(UORDF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) {
-        IO(3) = (isnan(FLT(1)) || isnan(FLT(2))) ? 1 : 0;
-    } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) {
-        IO(3) = (isnan(DBL(1)) || isnan(DBL(2))) ? 1 : 0;
-    } else {
-        abortWithError("bit widths of operands erronous");
-    }
+    IO(3) = (isnan(FLT(1)) || isnan(FLT(2))) ? 1 : 0;
 END_TRIGGER;
 
 END_OPERATION(UORDF)
 
+
+//////////////////////////////////////////////////////////////////////////////
+// ADDD - double-precision floating-point add
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(ADDD)
+
+TRIGGER
+    IO(3) = DBL(1) + DBL(2);
+END_TRIGGER;
+
+END_OPERATION(ADDD)
+
+//////////////////////////////////////////////////////////////////////////////
+// SUBD - double-precision floating-point subtract
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(SUBD)
+
+TRIGGER
+    IO(3) = DBL(1) - DBL(2);
+END_TRIGGER;
+
+END_OPERATION(SUBD)
+
+//////////////////////////////////////////////////////////////////////////////
+// MULD - double-precision floating-point multiply
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(MULD)
+
+TRIGGER
+    IO(3) = DBL(1) * DBL(2);
+END_TRIGGER;
+
+END_OPERATION(MULD)
+
+//////////////////////////////////////////////////////////////////////////////
+// DIVD - double-precision floating-point divide
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(DIVD)
+
+TRIGGER
+    IO(3) = DBL(1) / DBL(2);
+END_TRIGGER;
+
+END_OPERATION(DIVD)
+
+
+//////////////////////////////////////////////////////////////////////////////
+// EQD - double-precision floating-point compare equal (ordered)
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(EQD)
+
+TRIGGER
+    IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) == DBL(2)) ? 1 : 0;
+END_TRIGGER;
+
+END_OPERATION(EQD)
+
+  
+//////////////////////////////////////////////////////////////////////////////
+// EQUD - double-precision floating-point compare equal (unordered)
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(EQUD)
+
+TRIGGER
+    IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) == DBL(2)) ? 1 : 0;
+END_TRIGGER;
+
+END_OPERATION(EQUD)
+
+  
+//////////////////////////////////////////////////////////////////////////////
+// NED - double-precision floating-point compare not equal (ordered)
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(NED)
+
+TRIGGER
+    IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) != DBL(2)) ? 1 : 0;
+END_TRIGGER;
+
+END_OPERATION(NED)
+
+
+//////////////////////////////////////////////////////////////////////////////
+// NEUD - floating-point compare not equal (unordered)
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(NEUD)
+
+TRIGGER
+    IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) != DBL(2)) ? 1 : 0;
+END_TRIGGER;
+
+END_OPERATION(NEUD)
+
+//////////////////////////////////////////////////////////////////////////////
+// GTD - double-precision floating-point compare greater (ordered)
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(GTD)
+
+TRIGGER
+    IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) > DBL(2)) ? 1 : 0;
+END_TRIGGER;
+
+END_OPERATION(GTD)
+
+//////////////////////////////////////////////////////////////////////////////
+// GTUD - double-precision floating-point compare greater (unordered)
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(GTUD)
+
+TRIGGER
+    IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) > DBL(2)) ? 1 : 0;
+END_TRIGGER;
+
+END_OPERATION(GTUD)
+
+//////////////////////////////////////////////////////////////////////////////
+// GED - double-precision floating-point compare greater or equal
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(GED)
+
+TRIGGER
+    IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) >= DBL(2)) ? 1 : 0;
+END_TRIGGER;
+
+END_OPERATION(GED)
+
+//////////////////////////////////////////////////////////////////////////////
+// GEUD - double-precision floating-point compare greater or equal (unordered)
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(GEUD)
+
+TRIGGER
+    IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) >= DBL(2)) ? 1 : 0;
+END_TRIGGER;
+
+END_OPERATION(GEUD)
+
+//////////////////////////////////////////////////////////////////////////////
+// LTD - double-precision floating-point compare lower (ordered)
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(LTD)
+
+TRIGGER
+    IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) < DBL(2)) ? 1 : 0;
+END_TRIGGER;
+
+END_OPERATION(LTD)
+
+  
+//////////////////////////////////////////////////////////////////////////////
+// LTUD - double-precision floating-point compare lower (unordered)
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(LTUD)
+
+TRIGGER
+    IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) < DBL(2)) ? 1 : 0;
+END_TRIGGER;
+
+END_OPERATION(LTUD)
+
+//////////////////////////////////////////////////////////////////////////////
+// LED - double-precision floating-point compare lower or equal
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(LED)
+
+TRIGGER
+    IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) <= DBL(2)) ? 1 : 0;
+END_TRIGGER;
+
+END_OPERATION(LED)
+
+//////////////////////////////////////////////////////////////////////////////
+// LEUD - double-precision floating-point compare lower or equal (unordered)
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(LEUD)
+
+TRIGGER
+    IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) <= DBL(2)) ? 1 : 0;
+END_TRIGGER;
+
+END_OPERATION(LEUD)
+
+
+//////////////////////////////////////////////////////////////////////////////
+// ORDD - double-precision floating-point order check
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(ORDD)
+
+TRIGGER
+    IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2))) ? 1 : 0;
+END_TRIGGER;
+
+END_OPERATION(ORDD)
+
+	 
+//////////////////////////////////////////////////////////////////////////////
+// UORDD - floating-point unorder check
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(UORDD)
+
+TRIGGER
+    IO(3) = (isnan(DBL(1)) || isnan(DBL(2))) ? 1 : 0;
+END_TRIGGER;
+
+END_OPERATION(UORDD)
+
 //////////////////////////////////////////////////////////////////////////////
 // CFI  - convert floating-point to integer
 //////////////////////////////////////////////////////////////////////////////
@@ -956,6 +1059,58 @@ END_TRIGGER;
 
 END_OPERATION(CIFU)
 
+//////////////////////////////////////////////////////////////////////////////
+// CDI  - convert double-precision floating-point to integer
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(CDI)
+
+TRIGGER
+    DoubleWord in = DBL(1);
+    SIntWord out = static_cast<SIntWord>(in);
+    IO(2) = out;
+END_TRIGGER;
+
+END_OPERATION(CDI)
+
+//////////////////////////////////////////////////////////////////////////////
+// CDIU  - convert double-precision floating-point to unsigned integer
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(CDIU)
+
+TRIGGER
+    DoubleWord in = DBL(1);
+    UIntWord out = static_cast<UIntWord>(in);
+    IO(2) = out;
+END_TRIGGER;
+
+END_OPERATION(CDIU)
+
+//////////////////////////////////////////////////////////////////////////////
+// CID - convert integer to floating-point (i2d)
+//////////////////////////////////////////////////////////////////////////////
+// NOTE: can't be tested whether the output really is double...
+OPERATION(CID)
+
+TRIGGER
+    SIntWord in = INT(1);
+    IO(2) = static_cast<DoubleWord>(in);
+END_TRIGGER;
+
+END_OPERATION(CID)
+
+//////////////////////////////////////////////////////////////////////////////
+// CIDU - convert unsigned integer to floating-point (i2d)
+//////////////////////////////////////////////////////////////////////////////
+// NOTE: can't be tested whether the output really is double...
+OPERATION(CIDU)
+
+TRIGGER
+    UIntWord in = UINT(1);
+    IO(2) = static_cast<DoubleWord>(in);
+END_TRIGGER;
+
+END_OPERATION(CIDU)
+
 //////////////////////////////////////////////////////////////////////////////
 // CFD  - convert single precision floating-point to double
 //////////////////////////////////////////////////////////////////////////////
@@ -1060,6 +1215,19 @@ END_TRIGGER;
 
 END_OPERATION(CIHU)
 
+//////////////////////////////////////////////////////////////////////////////
+// INVSQRTD - double-precision floating-point inverse square root
+//////////////////////////////////////////////////////////////////////////////
+OPERATION(INVSQRTD)
+
+TRIGGER
+    DoubleWord in = DBL(1);
+    DoubleWord result(1.0 / sqrt(in));
+    IO(2) = result;
+END_TRIGGER;
+
+END_OPERATION(INVSQRTD)
+
 //////////////////////////////////////////////////////////////////////////////
 // INVSQRTH - half-float inverse square root
 //////////////////////////////////////////////////////////////////////////////
@@ -1133,27 +1301,17 @@ END_TRIGGER;
 END_OPERATION(ABS)
 
 //////////////////////////////////////////////////////////////////////////////
-// ABSF - floating-point absolute value
+// ABSF - single-precision floating-point absolute value
 //////////////////////////////////////////////////////////////////////////////
 
 OPERATION(ABSF)
 
 TRIGGER
-    if (BWIDTH(1) == FLT_WORD_SIZE) {
-        FloatWord in1 = FLT(1);
-        if (in1 < 0) {
-            in1 = -in1;
-        }
-        IO(2) = in1;
-    } else if (BWIDTH(1) == DBL_WORD_SIZE) {
-        DoubleWord in = DBL(1);
-        if (in < 0) {
-            in = -in;
-        }
-        IO(2) = in;
-    } else { 
-        abortWithError("bit width of operand erronous");
+    FloatWord in1 = FLT(1);
+    if (in1 < 0) {
+        in1 = -in1;
     }
+    IO(2) = in1;
 END_TRIGGER;
 
 END_OPERATION(ABSF)
@@ -1208,6 +1366,72 @@ END_TRIGGER;
 
 END_OPERATION(MINF)
 
+//////////////////////////////////////////////////////////////////////////////
+// ABSD - double-precision floating-point absolute value
+//////////////////////////////////////////////////////////////////////////////
+
+OPERATION(ABSD)
+
+TRIGGER
+    DoubleWord in = DBL(1);
+    if (in < 0) {
+        in = -in;
+    }
+    IO(2) = in;
+END_TRIGGER;
+
+END_OPERATION(ABSD)
+
+//////////////////////////////////////////////////////////////////////////////
+// SQRTD - floating-point square root
+//////////////////////////////////////////////////////////////////////////////
+
+OPERATION(SQRTD)
+
+TRIGGER
+    IO(2) = sqrt(DBL(1));
+END_TRIGGER;
+
+END_OPERATION(SQRTD)
+
+//////////////////////////////////////////////////////////////////////////////
+// MAXD - higher of two floating-point values
+//////////////////////////////////////////////////////////////////////////////
+
+OPERATION(MAXD)
+
+TRIGGER
+    DoubleWord in1 = DBL(1);
+    DoubleWord in2 = DBL(2);
+    if( in2 > in1 ) {
+      IO(3) = in2;
+    }
+    else {
+      IO(3) = in1;
+    }
+END_TRIGGER;
+
+END_OPERATION(MAXD)
+
+//////////////////////////////////////////////////////////////////////////////
+// MIND - lower of two floating-point values
+//////////////////////////////////////////////////////////////////////////////
+
+OPERATION(MIND)
+
+TRIGGER
+    DoubleWord in1 = DBL(1);
+    DoubleWord in2 = DBL(2);
+    if( in2 < in1 ) {
+      IO(3) = in2;
+    }
+    else {
+      IO(3) = in1;
+    }
+END_TRIGGER;
+
+END_OPERATION(MIND)
+
 //////////////////////////////////////////////////////////////////////////////
 // LDQU - load 1 mimimum addressable unit from memory (unsigned)
 //////////////////////////////////////////////////////////////////////////////
diff --git a/tce/opset/base/base.opp b/tce/opset/base/base.opp
index cf641c5b74..e59f630d5c 100644
--- a/tce/opset/base/base.opp
+++ b/tce/opset/base/base.opp
@@ -672,6 +672,23 @@ Output returns '1' if input 1 is greater than input 2 and otherwise returns '0'.
     </trigger-semantics>
   </operation>
 
+  <operation>
+    <name>NEGD</name>
+    <description/>
+    <inputs>1</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <out element-count="1" element-width="64" id="2" type="DoubleWord"/>
+    <trigger-semantics>
+      EXEC_OPERATION(negd, IO(1), IO(2));
+    </trigger-semantics>
+    <trigger-semantics>
+      SimValue tmp;
+      EXEC_OPERATION(subd, IO(1), IO(1), tmp);
+      EXEC_OPERATION(subd, tmp, IO(1), IO(2));
+    </trigger-semantics>
+  </operation>
+
   <operation>
     <name>NEGF</name>
     <description>Arithmetic negation, floating-point.</description>
@@ -712,6 +729,26 @@ Output returns '1' if input 1 is greater than input 2 and otherwise returns '0'.
     </trigger-semantics>
   </operation>
 
+  <operation>
+    <name>MAXD</name>
+    <description/>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord"/>
+    <out element-count="1" element-width="64" id="3" type="DoubleWord"/>
+  </operation>
+
+  <operation>
+    <name>MIND</name>
+    <description/>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord"/>
+    <out element-count="1" element-width="64" id="3" type="DoubleWord"/>
+  </operation>
+
   <operation>
     <name>MAXF</name>
     <description>Returns the larger of the two floating-point input values.</description>
@@ -813,6 +850,300 @@ Output returns '1' if input 1 is greater than input 2 and otherwise returns '0'.
     </trigger-semantics>
   </operation>
 
+  <operation>
+    <name>ADDD</name>
+    <description/>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord">
+      <can-swap>
+        <in id="2"/>
+      </can-swap>
+    </in>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord">
+      <can-swap>
+        <in id="1"/>
+      </can-swap>
+    </in>
+    <out element-count="1" element-width="64" id="3" type="DoubleWord"/>
+    <trigger-semantics>
+      EXEC_OPERATION(addd, IO(1), IO(2), IO(3));
+    </trigger-semantics>
+  </operation>
+
+  <operation>
+    <name>SUBD</name>
+    <description/>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord"/>
+    <out element-count="1" element-width="64" id="3" type="DoubleWord"/>
+    <trigger-semantics>
+      EXEC_OPERATION(subd, IO(1), IO(2), IO(3));
+    </trigger-semantics>
+  </operation>
+
+  <operation>
+    <name>MULD</name>
+    <description/>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord">
+      <can-swap>
+        <in id="2"/>
+      </can-swap>
+    </in>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord">
+      <can-swap>
+        <in id="1"/>
+      </can-swap>
+    </in>
+    <out element-count="1" element-width="64" id="3" type="DoubleWord"/>
+    <trigger-semantics>
+      EXEC_OPERATION(muld, IO(1), IO(2), IO(3));
+    </trigger-semantics>
+  </operation>
+
+  <operation>
+    <name>DIVD</name>
+    <description/>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord"/>
+    <out element-count="1" element-width="64" id="3" type="DoubleWord"/>
+    <trigger-semantics>
+      EXEC_OPERATION(divd, IO(1), IO(2), IO(3));
+    </trigger-semantics>
+  </operation>
+
+  <operation>
+    <name>EQD</name>
+    <description/>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord">
+      <can-swap>
+        <in id="2"/>
+      </can-swap>
+    </in>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord">
+      <can-swap>
+        <in id="1"/>
+      </can-swap>
+    </in>
+    <out element-count="1" element-width="32" id="3" type="UIntWord"/>
+    <trigger-semantics>
+      EXEC_OPERATION(eqd, IO(1), IO(2), IO(3));
+    </trigger-semantics>
+  </operation>
+
+  <operation>
+    <name>NED</name>
+    <description/>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord">
+      <can-swap>
+        <in id="2"/>
+      </can-swap>
+    </in>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord">
+      <can-swap>
+        <in id="1"/>
+      </can-swap>
+    </in>
+    <out element-count="1" element-width="32" id="3" type="UIntWord"/>
+    <trigger-semantics>
+      EXEC_OPERATION(ned, IO(1), IO(2), IO(3));
+    </trigger-semantics>
+    <trigger-semantics>
+      SimValue t1,t2;
+      EXEC_OPERATION(gtd, IO(1), IO(2), t1);
+      EXEC_OPERATION(gtd, IO(2), IO(1), t2);
+      EXEC_OPERATION(ior, t1, t2, IO(3));
+    </trigger-semantics>
+  </operation>
+
+  <operation>
+    <name>GTD</name>
+    <description/>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord"/>
+    <out element-count="1" element-width="32" id="3" type="UIntWord"/>
+    <trigger-semantics>
+      EXEC_OPERATION(gtd, IO(1), IO(2), IO(3));
+    </trigger-semantics>
+  </operation>
+
+  <operation>
+    <name>GED</name>
+    <description/>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord"/>
+    <out element-count="1" element-width="32" id="3" type="UIntWord"/>
+    <trigger-semantics>
+      EXEC_OPERATION(ged, IO(1), IO(2), IO(3));
+    </trigger-semantics>
+    <trigger-semantics>
+      SimValue t1,t2;
+      EXEC_OPERATION(gtd, IO(1), IO(2), t1);
+      EXEC_OPERATION(eqd, IO(1), IO(2), t2);
+      EXEC_OPERATION(ior, t1, t2, IO(3));
+    </trigger-semantics>
+  </operation>
+
+  <operation>
+    <name>LTD</name>
+    <description/>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord"/>
+    <out element-count="1" element-width="32" id="3" type="UIntWord"/>
+    <trigger-semantics>
+      EXEC_OPERATION(ltd, IO(1), IO(2), IO(3));
+    </trigger-semantics>
+    <trigger-semantics>
+      EXEC_OPERATION(ged, IO(2), IO(1), IO(3));
+    </trigger-semantics>
+  </operation>
+
+  <operation>
+    <name>LED</name>
+    <description/>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord"/>
+    <out element-count="1" element-width="32" id="3" type="UIntWord"/>
+    <trigger-semantics>
+      EXEC_OPERATION(led, IO(1), IO(2), IO(3));
+    </trigger-semantics>
+    <trigger-semantics>
+      SimValue t1,t2;
+      EXEC_OPERATION(eqd, IO(1), IO(2), t1);
+      EXEC_OPERATION(gtd, IO(2), IO(1), t2);
+      EXEC_OPERATION(ior, t1, t2, IO(3));
+    </trigger-semantics>
+  </operation>
+
+  <operation>
+    <name>EQUD</name>
+    <description></description>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord">
+      <can-swap>
+        <in id="2"/>
+      </can-swap>
+    </in>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord">
+      <can-swap>
+        <in id="1"/>
+      </can-swap>
+    </in>
+    <out element-count="1" element-width="32" id="3" type="UIntWord"/>
+    <trigger-semantics>
+      EXEC_OPERATION(eqd, IO(1), IO(2), IO(3));
+    </trigger-semantics>
+  </operation>
+
+  <operation>
+    <name>NEUD</name>
+    <description></description>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord">
+      <can-swap>
+        <in id="2"/>
+      </can-swap>
+    </in>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord">
+      <can-swap>
+        <in id="1"/>
+      </can-swap>
+    </in>
+    <out element-count="1" element-width="32" id="3" type="UIntWord"/>
+    <trigger-semantics>
+      SimValue temp1;
+      EXEC_OPERATION(eqd, IO(1), IO(2), temp1);
+      EXEC_OPERATION(xor, temp1, 1, IO(3));
+    </trigger-semantics>
+  </operation>
+
+  <operation>
+    <name>GTUD</name>
+    <description></description>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord"/>
+    <out element-count="1" element-width="32" id="3" type="UIntWord"/>
+    <trigger-semantics>
+      EXEC_OPERATION(gtd, IO(1), IO(2), IO(3));
+    </trigger-semantics>
+  </operation>
+
+  <operation>
+    <name>GEUD</name>
+    <description></description>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord"/>
+    <out element-count="1" element-width="32" id="3" type="UIntWord"/>
+    <trigger-semantics>
+      SimValue temp1, temp2;
+      EXEC_OPERATION(gtd, IO(1), IO(2), temp1);
+      EXEC_OPERATION(eqd, IO(1), IO(2), temp2);
+      EXEC_OPERATION(ior, temp1, temp2, IO(3));
+    </trigger-semantics>
+  </operation>
+
+  <operation>
+    <name>LTUD</name>
+    <description></description>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord"/>
+    <out element-count="1" element-width="32" id="3" type="UIntWord"/>
+    <trigger-semantics>
+      SimValue temp1, temp2, temp3;
+      EXEC_OPERATION(gtd, IO(1), IO(2), temp1);
+      EXEC_OPERATION(eqd, IO(1), IO(2), temp2);
+      EXEC_OPERATION(ior, temp1, temp2, temp3);
+      EXEC_OPERATION(xor, temp3, 1, IO(3));
+    </trigger-semantics>
+  </operation>
+
+  <operation>
+    <name>LEUD</name>
+    <description></description>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord"/>
+    <out element-count="1" element-width="32" id="3" type="UIntWord"/>
+    <trigger-semantics>
+      SimValue temp1;
+      EXEC_OPERATION(gtd, IO(1), IO(2), temp1);
+      EXEC_OPERATION(xor,temp1,1, IO(3));
+    </trigger-semantics>
+    <trigger-semantics>
+      SimValue t1,t2;
+      EXEC_OPERATION(eqd, IO(1), IO(2), t1);
+      EXEC_OPERATION(gtd, IO(2), IO(1), t2);
+      EXEC_OPERATION(ior, t1, t2, IO(3));
+    </trigger-semantics>
+  </operation>
+
   <operation>
     <name>ADDF</name>
     <description>Floating-point addition. Output 3 is sum of inputs 1 and 2.</description>
@@ -973,7 +1304,7 @@ Output returns '1' if input 1 is greater than input 2 and otherwise returns '0'.
       EXEC_OPERATION(ltf, IO(1), IO(2), IO(3));
     </trigger-semantics>
     <trigger-semantics>
-      EXEC_OPERATION(gtf, IO(2), IO(1), IO(3));
+      EXEC_OPERATION(gef, IO(2), IO(1), IO(3));
     </trigger-semantics>
   </operation>
 
@@ -1319,6 +1650,54 @@ EXEC_OPERATION(xor,temp1,1, IO(3));
     </trigger-semantics>
   </operation>
 
+  <operation>
+    <name>MACD</name>
+    <description/>
+    <inputs>3</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord">
+      <can-swap>
+        <in id="3"/>
+      </can-swap>
+    </in>
+    <in element-count="1" element-width="64" id="3" type="DoubleWord">
+      <can-swap>
+        <in id="2"/>
+      </can-swap>
+    </in>
+    <out element-count="1" element-width="64" id="4" type="DoubleWord"/>
+    <trigger-semantics>
+      SimValue t1;
+      EXEC_OPERATION(muld, IO(2), IO(3), t1);
+      EXEC_OPERATION(addd, IO(1), t1, IO(4));
+    </trigger-semantics>
+  </operation>
+
+  <operation>
+    <name>MSUD</name>
+    <description/>
+    <inputs>3</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord">
+      <can-swap>
+        <in id="3"/>
+      </can-swap>
+    </in>
+    <in element-count="1" element-width="64" id="3" type="DoubleWord">
+      <can-swap>
+        <in id="2"/>
+      </can-swap>
+    </in>
+    <out element-count="1" element-width="64" id="4" type="DoubleWord"/>
+    <trigger-semantics>
+      SimValue t1;
+      EXEC_OPERATION(muld, IO(2), IO(3), t1);
+      EXEC_OPERATION(subd, IO(1), t1, IO(4));
+    </trigger-semantics>
+  </operation>
+
   <operation>
     <name>MAC</name>
     <description>Multiply and accumulate (signed integer).</description>
@@ -1714,6 +2093,18 @@ EXEC_OPERATION(xor,temp1,1, IO(3));
     <out element-count="1" element-width="32" id="2" type="SIntWord"/>
   </operation>
 
+  <operation>
+    <name>ABSD</name>
+    <description/>
+    <inputs>1</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <out element-count="1" element-width="64" id="2" type="DoubleWord"/>
+    <trigger-semantics>
+      EXEC_OPERATION(absd, IO(1), IO(2));
+    </trigger-semantics>
+  </operation>
+
   <operation>
     <name>ABSF</name>
     <description>Floating-point absolute value.</description>
@@ -1973,6 +2364,51 @@ The simulation behavior definition assumes 100MHz clock frequency, which can be
     <out element-count="1" element-width="32" id="2" type="UIntWord"/>
   </operation>
 
+  <operation>
+    <name>ORDD</name>
+    <description>Returns 1 if floating point input operands are ordered. i.e. neither of the input operands is NaN.
+
+</description>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord">
+      <can-swap>
+        <in id="2"/>
+      </can-swap>
+    </in>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord">
+      <can-swap>
+        <in id="1"/>
+      </can-swap>
+    </in>
+    <out element-count="1" element-width="32" id="3" type="UIntWord"/>
+    <trigger-semantics>
+      SimValue temp1, temp2;
+      EXEC_OPERATION(eqd, IO(1), IO(1), temp1);
+      EXEC_OPERATION(eqd, IO(2), IO(2), temp2);
+      EXEC_OPERATION(and, temp1, temp2, IO(3));
+    </trigger-semantics>
+  </operation>
+
+  <operation>
+    <name>UORDD</name>
+    <description>Returns 1 if the two floating point operands are unordered. i.e. isNaN(io1) || isNan(io2).
+
+</description>
+    <inputs>2</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <in element-count="1" element-width="64" id="2" type="DoubleWord"/>
+    <out element-count="1" element-width="32" id="3" type="UIntWord"/>
+    <trigger-semantics>
+      SimValue temp1, temp2, temp3;
+      EXEC_OPERATION(eqd, IO(1), IO(1), temp1);
+      EXEC_OPERATION(eqd, IO(2), IO(2), temp2);
+      EXEC_OPERATION(and, temp1, temp2, temp3);
+      EXEC_OPERATION(xor, temp3, 1, IO(3));
+    </trigger-semantics>
+  </operation>
+
   <operation>
     <name>ORDF</name>
     <description>Returns 1 if floating point input operands are ordered. i.e. neither of the input operands is NaN.
@@ -2022,6 +2458,15 @@ EXEC_OPERATION(xor, temp3, 1, IO(3));
 </trigger-semantics>
   </operation>
 
+  <operation>
+    <name>SQRTD</name>
+    <description>Performs a square root of a double precision floating point.</description>
+    <inputs>1</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <out element-count="1" element-width="64" id="2" type="DoubleWord"/>
+  </operation>
+
   <operation>
     <name>SQRTF</name>
     <description>Performs a square root of a single precision floating point. For example, the MIPS R4000 FPU implements it.
@@ -2048,6 +2493,15 @@ EXEC_OPERATION(xor, temp3, 1, IO(3));
     </trigger-semantics>
   </operation>
 
+  <operation>
+    <name>INVSQRTD</name>
+    <description>Performs a inverse square root of a double-precision floating point.</description>
+    <inputs>1</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <out element-count="1" element-width="64" id="2" type="DoubleWord"/>
+  </operation>
+
   <operation>
     <name>INVSQRTH</name>
     <description>Performs a inverse square root of a half-precision floating point.
@@ -2058,6 +2512,18 @@ EXEC_OPERATION(xor, temp3, 1, IO(3));
     <out element-count="1" element-width="16" id="2" type="HalfFloatWord"/>
   </operation>
 
+  <operation>
+    <name>SQUARED</name>
+    <description>Computes the square of a double-precision floating point.</description>
+    <inputs>1</inputs>
+    <outputs>1</outputs>
+    <in element-count="1" element-width="64" id="1" type="DoubleWord"/>
+    <out element-count="1" element-width="64" id="2" type="DoubleWord"/>
+    <trigger-semantics>
+      EXEC_OPERATION(muld, IO(1), IO(1), IO(2));
+    </trigger-semantics>
+  </operation>
+
   <operation>
     <name>SQUAREH</name>
     <description>Computes the square of a half-precision floating point.
diff --git a/tce/src/applibs/LLVMBackend/TCETargetMachine.cc b/tce/src/applibs/LLVMBackend/TCETargetMachine.cc
index 174c9209a6..7db5f24544 100644
--- a/tce/src/applibs/LLVMBackend/TCETargetMachine.cc
+++ b/tce/src/applibs/LLVMBackend/TCETargetMachine.cc
@@ -203,7 +203,7 @@ TCETargetMachine::setTargetMachinePlugin(TCETargetMachinePlugin& plugin) {
     dataLayoutStr += "-i64:32:32";
     dataLayoutStr += "-f16:16:16";
     dataLayoutStr += "-f32:32:32";
-    dataLayoutStr += "-f64:32:64";
+    dataLayoutStr += "-f64:64:64";
     dataLayoutStr += "-v64:32:64";
     dataLayoutStr += "-v128:32:128";
     dataLayoutStr += "-v256:32:256";
diff --git a/tce/src/applibs/LLVMBackend/TDGen.cc b/tce/src/applibs/LLVMBackend/TDGen.cc
index 4e16b72c08..94860deeda 100644
--- a/tce/src/applibs/LLVMBackend/TDGen.cc
+++ b/tce/src/applibs/LLVMBackend/TDGen.cc
@@ -231,6 +231,7 @@ TDGen::writeRegisterInfo(std::ostream& o)
     write1bitRegisterInfo(o);
     //write16bitRegisterInfo(o);
     write32bitRegisterInfo(o);
+    write64bitRegisterInfo(o);
     writeVectorRegisterInfo(o);
 
     return true;
@@ -248,6 +249,13 @@ TDGen::writeRegisterClasses(std::ostream& o) {
           << "}" << std::endl;
     }
     
+    o << "class R16<string n, list<Register> aliases> : TCEReg<n, aliases> {"
+      << "}" << std::endl;
+    if (hasExIntRegs_) {
+        o << "class R16_Ex<string n, list<Register> aliases> : R16<n, aliases> {}"
+          << std::endl;
+    }
+
     o << "class R32<string n, list<Register> aliases> : TCEReg<n, aliases> {"
       << "}" << std::endl;
     if (hasExIntRegs_) {
@@ -255,13 +263,13 @@ TDGen::writeRegisterClasses(std::ostream& o) {
           << std::endl;
     }
     
-    o << "class R16<string n, list<Register> aliases> : TCEReg<n, aliases> {"
+    o << "class R64<string n, list<Register> aliases> : TCEReg<n, aliases> {"
       << "}" << std::endl;
     if (hasExIntRegs_) {
-        o << "class R16_Ex<string n, list<Register> aliases> : R16<n, aliases> {}"
+        o << "class R64_Ex<string n, list<Register> aliases> : R64<n, aliases> {}"
           << std::endl;
     }
-
+    
     for (int i = 0; i <= highestLaneInt_; i++) {
         o << "class R32_L_" << i << "<string n, list<Register> aliases> : R32<n, aliases>{}" << std::endl;
     }
@@ -604,7 +612,6 @@ TDGen::write32bitRegisterInfo(std::ostream& o) {
             o << ")>;" << std::endl;
         }
     }
-    
 
     o << std::endl;
 }
@@ -688,22 +695,21 @@ TDGen::write64bitRegisterInfo(std::ostream& o) {
 
     if (regs64bit_.size() < 1) {
         RegInfo reg = { "dummy64", 0 };
-        writeRegisterDef(o, reg, "DIRES0", "Ri64", "", RESERVED);
+        writeRegisterDef(o, reg, "DIRES0", "R64", "", RESERVED);
         i64regs = "DIRES0";
     } else {
-  
-      writeRegisterDef(o, regs64bit_[0], "DIRES0", "Ri64", "", RESERVED);
+        writeRegisterDef(o, regs64bit_[0], "DIRES0", "R64", "", RESERVED);
         for (unsigned i = 1; i < regs64bit_.size(); i++) {
             std::string regName = "DI" + Conversion::toString(i);
             i64regs += regName;
             i64regs += ", ";
-            writeRegisterDef(o, regs64bit_[i], regName, "Ri64", "", GPR);
+            writeRegisterDef(o, regs64bit_[i], regName, "R64", "", GPR);
         }
         i64regs += "DIRES0";
     }
 
     o << std::endl
-      << "def I64Regs : RegisterClass<\"TCE\", [i64], 32, (add " // DIRES
+      << "def R64Regs : RegisterClass<\"TCE\", [i64,f64], 64, (add " // DIRES
       << i64regs << ")> ;"
       << std::endl;
 
@@ -711,23 +717,21 @@ TDGen::write64bitRegisterInfo(std::ostream& o) {
 
     if (regs64bit_.size() < 1) {
         RegInfo reg = { "dummy64", 0 };
-        writeRegisterDef(o, reg, "DRES0", "Rf64", "", RESERVED);
+        writeRegisterDef(o, reg, "DRES0", "R64", "", RESERVED);
         f64regs = "DRES0";
     } else {
-        writeRegisterDef(
-            o, regs64bit_[0], "DRES0", "Ri64", "DIRES0", RESERVED);
+        writeRegisterDef(o, regs64bit_[0], "DRES0", "R64", "DIRES0", RESERVED);
         for (unsigned i = 1; i < regs64bit_.size(); i++) {
             std::string regName = "D" + Conversion::toString(i);
             std::string aliasName = "DI" + Conversion::toString(i);
             f64regs += regName;
             f64regs += ", ";
-            writeRegisterDef(
-                o, regs64bit_[i], regName, "Rf64", aliasName, GPR);
+            writeRegisterDef(o, regs64bit_[i], regName, "R64", aliasName, GPR);
         }
         f64regs += "DRES0";
     }
     o << std::endl
-      << "def F64Regs : RegisterClass<\"TCE\", [f64], 32, (add "
+      << "def R64FPRegs : RegisterClass<\"TCE\", [f64], 64, (add "
       << f64regs << ")>;" << std::endl;
 }
 
@@ -887,6 +891,7 @@ TDGen::writeVectorRegisterInfo(
               << "HFPRegs : RegisterClass<\"TCE\", [v" << vectorWidth
               << "f16], " << 32 * vectorWidth  << ", (add V" << vectorWidth << "R32DUMMY)> ;"
               << std::endl << std::endl;*/
+            //RWH: need something here for f64?
         } else {
             o << "def " << regClassBase << "_L_" << i
               << "Regs : RegisterClass<\"TCE\", [v" << vectorWidth
@@ -1045,6 +1050,11 @@ TDGen::writeInstrInfo(std::ostream& os) {
         }
     }
 
+    opNames_["LDDdr"] = "LDD";
+    opNames_["LDDdi"] = "LDD";
+    opNames_["STDdr"] = "STD";
+    opNames_["STDdi"] = "STD";
+
     opNames_["LDWfr"] = "LDW";
     opNames_["LDWfi"] = "LDW";
     opNames_["STWfr"] = "STW";
@@ -1379,6 +1389,7 @@ TDGen::writeBackendCode(std::ostream& o) {
             if (opName == "sxhw") hasSXHW = true;
             if (opName == "sxqw") hasSXQW = true;
 	    if (opName == "sqrtf") hasSQRTF = true;
+            //RWH: add more cases here for double-precision ops?
         }
     }
 
@@ -1433,15 +1444,15 @@ TDGen::writeTopLevelTD(std::ostream& o) {
  * a
  * b = Boolean/predicate register
  * c
- * d
- * e
+ * d = Float64 register
+ * e = immediate float64
  * f = Float32 register
  * g
  * h = Float16 register
  * i = Immediate integer
  * j = immediate boolean
- * k = immediate float?
- * l = immediate float16?
+ * k = immediate float32
+ * l = immediate float16
  * m = float2 vec?
  * n = float4 vec?
  * o = float8 vec?
@@ -1501,6 +1512,10 @@ TDGen::writeOperationDefs(
 
         return;
     }
+    if (op.name() == "CDI" || op.name() == "CDIU") {
+        writeOperationDef(o, op, "rd", attrs, skipPattern);
+        return;
+    }
         
     // rotations are allways n x n -> n bits.
     if (op.name() == "ROTL" || op.name() == "ROTR" ||
@@ -1537,9 +1552,18 @@ TDGen::writeOperationDefs(
         writeOperationDef(o, op, "bjjb", attrs, skipPattern);
         writeOperationDef(o, op, "bjbb", attrs, skipPattern);
         writeOperationDef(o, op, "bbjb", attrs, skipPattern);
-        // TODO: what about floating-point values?
+        writeOperationDef(o, op, "dddb", attrs, skipPattern);
+        writeOperationDef(o, op, "deeb", attrs, skipPattern);
+        writeOperationDef(o, op, "dedb", attrs, skipPattern);
+        writeOperationDef(o, op, "ddeb", attrs, skipPattern);
         writeOperationDef(o, op, "fffb", attrs, skipPattern);
+        writeOperationDef(o, op, "fkkb", attrs, skipPattern);
+        writeOperationDef(o, op, "fkfb", attrs, skipPattern);
+        writeOperationDef(o, op, "ffkb", attrs, skipPattern);
         writeOperationDef(o, op, "hhhb", attrs, skipPattern);
+        writeOperationDef(o, op, "hllb", attrs, skipPattern);
+        writeOperationDef(o, op, "hlhb", attrs, skipPattern);
+        writeOperationDef(o, op, "hhlb", attrs, skipPattern);
 
         hasSelect_ = true;
         return;
@@ -1549,7 +1573,6 @@ TDGen::writeOperationDefs(
     if (op.numberOfInputs() == 2 && op.numberOfOutputs() == 0) {
         Operand& operand1 = op.operand(1);
         Operand& operand2 = op.operand(2);
-        // TODO: add an else branch here for float immediates
         if ((operand1.type() == Operand::UINT_WORD || 
              operand1.type() == Operand::SINT_WORD ||
              operand1.type() == Operand::RAW_DATA) &&
@@ -1558,6 +1581,15 @@ TDGen::writeOperationDefs(
             operand2.type() == Operand::RAW_DATA)) {
 
             writeOperationDef(o, op, "ii", attrs, skipPattern);
+        } else if (operand1.type() == Operand::DOUBLE_WORD &&
+                   operand2.type() == Operand::DOUBLE_WORD) {
+            writeOperationDef(o, op, "ee", attrs, skipPattern);
+        } else if (operand1.type() == Operand::FLOAT_WORD &&
+                   operand2.type() == Operand::FLOAT_WORD) {
+            writeOperationDef(o, op, "kk", attrs, skipPattern);
+        } else if (operand1.type() == Operand::HALF_FLOAT_WORD &&
+                   operand2.type() == Operand::HALF_FLOAT_WORD) {
+            writeOperationDef(o, op, "ll", attrs, skipPattern);
         }
     }
 
@@ -1565,9 +1597,6 @@ TDGen::writeOperationDefs(
     // this the ordinary def
 
     // then try with immediates.
-    // TODO: this should be 2^n loop instead of n loop, to get
-    // all permutations.
-
     writeOperationDefs(o, op, operandTypes, attrs, skipPattern);
 
     // then with boolean outs, and vector versions.
@@ -1584,8 +1613,11 @@ TDGen::writeOperationDefs(
         // create vector versions.
 
 	// TODO: no half vectors yet, pending f16v4..f16v16 types in llvm.
+	// Also double vectors not implemented yet.
 	if (outOperand.type() != Operand::HALF_FLOAT_WORD && 
-            op.operand(1).type() != Operand::HALF_FLOAT_WORD) { 
+            outOperand.type() != Operand::DOUBLE_WORD &&
+            op.operand(1).type() != Operand::HALF_FLOAT_WORD &&
+            op.operand(1).type() != Operand::DOUBLE_WORD) { 
             for (int i = 0, w = 2; i < 3; i++, w<<=1) {
                 char floatChar = 'm' + i;
                 char intChar = 'v' + i;
@@ -1620,6 +1652,8 @@ TDGen::writeOperationDefs(
     // first without imms.
     writeOperationDef(o, op, operandTypes, attrs, skipPattern, backendPrefix);
 
+    // TODO: this should be 2^n loop instead of n loop, to get
+    // all permutations.
     for (int i = 0; i < op.numberOfInputs(); i++) {
         bool canSwap = false;
         for (int j = i+1 ; j < op.numberOfInputs(); j++) {
@@ -1642,6 +1676,9 @@ TDGen::writeOperationDefs(
             case 'b':
                 c = 'j';
                 break;
+            case 'd':
+                c = 'e';
+                break;
             case 'f':
                 c = 'k';
                 break;
@@ -1907,7 +1944,7 @@ TDGen::writeOperationDef(
 /**
  * Checks whether operand is integer or float type.
  *
- * @return 'r' for integer, 'f' for float
+ * @return letter from operand type table, e.g. 'r' for integer, 'f' for float
  */
 char 
 TDGen::operandChar(Operand& operand) {
@@ -1915,9 +1952,11 @@ TDGen::operandChar(Operand& operand) {
         return 'b';
     } else if (operand.type() == Operand::HALF_FLOAT_WORD) {
         return 'h';
+    } else if (operand.type() == Operand::DOUBLE_WORD) {
+        return 'd';
     } else if (operand.type() != Operand::UINT_WORD &&
-        operand.type() != Operand::SINT_WORD &&
-        operand.type() != Operand::RAW_DATA) {
+               operand.type() != Operand::SINT_WORD &&
+               operand.type() != Operand::RAW_DATA) {
         return 'f';
     } else {
         return 'r';
@@ -1988,7 +2027,7 @@ TDGen::writeEmulationPattern(
             char inputType = operandChar(op.operand(i+1));
             if (immInput == i+1) {
                 // float imm operands not allowed
-                if (inputType == 'f' || inputType == 'h') {
+                if (inputType == 'f' || inputType == 'h' || inputType == 'd') {
                     ok = false;
                     break;
                 } else {
@@ -2011,12 +2050,19 @@ TDGen::writeEmulationPattern(
                 op.name() == "LEF" || op.name() == "LEUF" ||
                 op.name() == "GTF" || op.name() == "GTUF" ||
                 op.name() == "NEF" || op.name() == "NEUF" ||
+                op.name() == "LTD" || op.name() == "LTUD" ||
+                op.name() == "EQD" || op.name() == "EQUD" ||
+                op.name() == "GED" || op.name() == "GEUD" || 
+                op.name() == "LED" || op.name() == "LEUD" ||
+                op.name() == "GTD" || op.name() == "GTUD" ||
+                op.name() == "NED" || op.name() == "NEUD" ||
                 op.name() == "EQ" || op.name() == "NE" ||
                 op.name() == "GE" ||op.name() == "GEU" ||
                 op.name() == "GT" || op.name() == "GTU" ||
                 op.name() == "LE" || op.name() == "LEU" ||
                 op.name() == "LT" || op.name() == "LTU" ||
-                op.name() == "ORDF" || op.name() == "UORDF") {
+                op.name() == "ORDF" || op.name() == "UORDF" ||
+                op.name() == "ORDD" || op.name() == "UORDD") {
                 std::string boolOperandTypes = operandTypes;
                 boolOperandTypes[0] = 'b';
                 o << "def : Pat<(" << match1.str() << "), "
@@ -2095,6 +2141,40 @@ TDGen::llvmOperationPattern(const Operation& op, char operandType) {
     if (opName == "negf") return "fneg %1%";
     if (opName == "sqrtf") return "fsqrt %1%";
 
+    if (opName == "eqd") return "setoeq %1%, %2%";
+    if (opName == "ned") return "setone %1%, %2%";
+    if (opName == "ltd") return "setolt %1%, %2%";
+    if (opName == "led") return "setole %1%, %2%";
+    if (opName == "gtd") return "setogt %1%, %2%";
+    if (opName == "ged") return "setoge %1%, %2%";
+
+    if (opName == "equd") return "setueq %1%, %2%";
+    if (opName == "neud") return "setune %1%, %2%";
+    if (opName == "ltud") return "setult %1%, %2%";
+    if (opName == "leud") return "setule %1%, %2%";
+    if (opName == "gtud") return "setugt %1%, %2%";
+    if (opName == "geud") return "setuge %1%, %2%";
+
+    if (opName == "ordd") return "seto %1%, %2%";
+    if (opName == "uordd") return "setuo %1%, %2%";
+
+    if (opName == "addd") return "fadd %1%, %2%";
+    if (opName == "subd") return "fsub %1%, %2%";
+    if (opName == "muld") return "fmul %1%, %2%";
+    if (opName == "divd") return "fdiv %1%, %2%";
+    if (opName == "absd") return "fabs %1%";
+    if (opName == "negd") return "fneg %1%";
+    if (opName == "sqrtd") return "fsqrt %1%";
+
+    //RWH: check
+    if (opName == "cid") return "sint_to_fp %1%";
+    if (opName == "cdi") return "fp_to_sint %1%";
+    if (opName == "cidu") return "uint_to_fp %1%";
+    if (opName == "cdiu") return "fp_to_uint %1%";
+
+    if (opName == "cdf") return "fround %1%";//fptrunc %1% to float
+    if (opName == "cfd") return "fextend %1%";//fpext %1% to double
+
     if (opName == "cif") return "sint_to_fp %1%";
     if (opName == "cfi") return "fp_to_sint %1%";
     if (opName == "cifu") return "uint_to_fp %1%";
@@ -2147,12 +2227,12 @@ TDGen::llvmOperationPattern(const Operation& op, char operandType) {
     if (opName == "ldh") return "sextloadi16 %1%";
     if (opName == "ldhu") return "zextloadi16 %1%";
     if (opName == "ldw") return "load %1%";
-    //if (opName == "ldd") return "load";
+    if (opName == "ldd") return "load %1%";//rwh: check
 
     if (opName == "stq") return "truncstorei8 %2%, %1%";
     if (opName == "sth") return "truncstorei16 %2%, %1%";
     if (opName == "stw") return "store %2%, %1%";
-    //if (opName == "std") return "load";
+    if (opName == "std") return "store %2%, %1%";//rwh: check
 
     if (opName == "sxhw") {
         switch (operandType) {
@@ -2256,6 +2336,40 @@ TDGen::llvmOperationName(const Operation& op) {
     if (opName == "negf") return "fneg";
     if (opName == "sqrtf") return "fsqrt";
 
+    if (opName == "eqd") return "setoeq";
+    if (opName == "ned") return "setone";
+    if (opName == "ltd") return "setolt";
+    if (opName == "led") return "setole";
+    if (opName == "gtd") return "setogt";
+    if (opName == "ged") return "setoge";
+
+    if (opName == "equd") return "setueq";
+    if (opName == "neud") return "setune";
+    if (opName == "ltud") return "setult";
+    if (opName == "leud") return "setule";
+    if (opName == "gtud") return "setugt";
+    if (opName == "geud") return "setuge";
+
+    if (opName == "ordd") return "seto";
+    if (opName == "uordd") return "setuo";
+
+    if (opName == "addd") return "fadd";
+    if (opName == "subd") return "fsub";
+    if (opName == "muld") return "fmul";
+    if (opName == "divd") return "fdiv";
+    if (opName == "absd") return "fabs";
+    if (opName == "negd") return "fneg";
+    if (opName == "sqrtd") return "fsqrt";
+
+    //RWH: check
+    if (opName == "cid") return "sint_to_fp";
+    if (opName == "cdi") return "fp_to_sint";
+    if (opName == "cidu") return "uint_to_fp";
+    if (opName == "cdiu") return "fp_to_uint";
+
+    if (opName == "cdf") return "fround";//fptrunc to float
+    if (opName == "cfd") return "fextend";//fpext to double
+
     if (opName == "cif") return "sint_to_fp";
     if (opName == "cfi") return "fp_to_sint";
     if (opName == "cifu") return "uint_to_fp";
@@ -2269,17 +2383,51 @@ TDGen::llvmOperationName(const Operation& op) {
     if (opName == "chf") return "fpextend";
 #endif
 
+    if (opName == "cih") return "sint_to_fp";
+    if (opName == "chi") return "fp_to_sint";
+    if (opName == "cihu") return "uint_to_fp";
+    if (opName == "chiu") return "fp_to_uint";
+
+    if (opName == "neuh") return "setune";
+    if (opName == "eqh") return "setoeq";
+    if (opName == "neh") return "setone";
+    if (opName == "lth") return "setolt";
+    if (opName == "leh") return "setole";
+    if (opName == "gth") return "setogt";
+    if (opName == "geh") return "setoge";
+
+    if (opName == "ordh") return "seto";
+    if (opName == "uordh") return "setuo";
+
+    if (opName == "addh") return "fadd";
+    if (opName == "subh") return "fsub";
+    if (opName == "mulh") return "fmul";
+    if (opName == "divh") return "fdiv";
+    if (opName == "absh") return "fabs";
+    if (opName == "negh") return "fneg";
+    if (opName == "sqrth") return "fsqrt";
+
+    if (opName == "cih") return "sint_to_fp";
+    if (opName == "chi") return "fp_to_sint";
+    if (opName == "cihu") return "uint_to_fp";
+    if (opName == "chiu") return "fp_to_uint";
+
+    if (opName == "csh") return "sint_to_fp";
+    if (opName == "cshu") return "uint_to_fp";
+    if (opName == "chs") return "fp_to_sint";
+    if (opName == "chsu") return "fp_to_uint";
+
     if (opName == "ldq") return "sextloadi8";
     if (opName == "ldqu") return "zextloadi8";
     if (opName == "ldh") return "sextloadi16";
     if (opName == "ldhu") return "zextloadi16";
     if (opName == "ldw") return "load";
-    //if (opName == "ldd") return "load";
+    if (opName == "ldd") return "load";//RWH: check
 
     if (opName == "stq") return "truncstorei8";
     if (opName == "sth") return "truncstorei16";
     if (opName == "stw") return "store";
-    //if (opName == "std") return "load";
+    if (opName == "std") return "store";
 
     if (opName == "sxhw") return "sext_inreg";
     if (opName == "sxqw") return "sext_inreg";
@@ -2324,6 +2472,8 @@ TDGen::operationCanBeMatched(
     if (llvmOperationPattern(op,'r') != "") {
         return true;
     }
+    //RWH: should probably lookup operand type instead of assuming 'r' in call
+    //to llvmOperationPattern
 
     std::set<std::string> useSet;
     if (recursionCycleCheck != NULL) {
@@ -2601,11 +2751,15 @@ TDGen::constantNodeString(
             case 'b':
                 return "(i1 " + Conversion::toString(node.value()) + ")";
             case 'h':
+            case 'l':
                 return "(f16 " + Conversion::toString(node.value()) + ")";
                 // TODO: f16 vectors not yet implemented
             case 'f':
             case 'k':
                 return "(f32 " + Conversion::toString(node.value()) + ")";
+            case 'd':
+            case 'e':
+                return "(f64 " + Conversion::toString(node.value()) + ")";
             case 'm':
                 return ("(v2f32 (build_vector (f32 " + 
                         Conversion::toString(node.value()) +
@@ -2762,12 +2916,17 @@ TDGen::emulatingOpNodeLLVMName(
                 } else {
                     if (dynamic_cast<ConstantNode*>(
                             &(dag.tailNode(edge)))) {
-                        if (operand.type() == Operand::SINT_WORD ||
-                            operand.type() == Operand::UINT_WORD ||
-                            operand.type() == Operand::RAW_DATA) {
-                            operationName += 'i';
-                        } else {
+                        if (operand.type() == Operand::BOOL) {
+                            operationName += 'j';
+                        } else if (operand.type() == Operand::HALF_FLOAT_WORD) {
+                            operationName += 'l';
+                        } else if (operand.type() == Operand::FLOAT_WORD) {
                             operationName += 'k';
+                        } else if (operand.type() == Operand::DOUBLE_WORD) {
+                            operationName += 'e';
+                        } else {
+                            // should be a 32b integer register (signed, unsigned, or raw)
+                            operationName += 'i';
                         }
                     } else {
                         TerminalNode* t = 
@@ -3009,7 +3168,7 @@ TDGen::operandToString(
         }
     } else if (operand.type() == Operand::DOUBLE_WORD) {
         // TODO: immediate check??
-        return "F64Regs:$op" + Conversion::toString(idx);
+        return "R64FPRegs:$op" + Conversion::toString(idx);
     } else {
         assert(false && "Unknown operand type.");
     }
@@ -3117,9 +3276,7 @@ TDGen::canBeImmediate(
 
 void
 TDGen::generateLoadStoreCopyGenerator(std::ostream& os) {
-    // vector store/load generation code
-
-    TCEString prefix = "&"; // address of -operator
+    TCEString prefix = "&"; // address-of operator
     TCEString rcpf = "RegsRegClass";
     TCEString rapf = "TCE::RARegRegClass";
 
@@ -3150,6 +3307,10 @@ TDGen::generateLoadStoreCopyGenerator(std::ostream& os) {
             os << "\tif (rc == " << prefix << "TCE::"  << ri->first
                << "HFP" << rcpf << ") return TCE::STHhr;" << std::endl;
         }
+        if (ri->first.find("R64") == 0) {
+            os << "\tif (rc == " << prefix << "TCE::"  << ri->first
+               << "FP" << rcpf << ") return TCE::STDdr;" << std::endl;
+        }
     }
     
     if (opNames_.find("STW2vr") != opNames_.end()) {
@@ -3244,6 +3405,10 @@ TDGen::generateLoadStoreCopyGenerator(std::ostream& os) {
             os << "\tif (rc == " << prefix << "TCE::" << ri->first
                << "HFP" << rcpf << ") return TCE::LDHhr;" << std::endl;
         }
+        if (ri->first.find("R64") == 0) {
+            os << "\tif (rc == " << prefix << "TCE::" << ri->first
+               << "FP" << rcpf << ") return TCE::LDDdr;" << std::endl;
+        }
     }
 
     if (opNames_.find("LDW2vr") != opNames_.end()) {
@@ -3323,6 +3488,9 @@ TDGen::createMinMaxGenerator(std::ostream& os) {
     if (opNames_.find("MINFfff") != opNames_.end()) {
         os << "if (vt == MVT::f32) return TCE::MINFfff;" << std::endl;
     }
+    if (opNames_.find("MINDddd") != opNames_.end()) {
+        os << "if (vt == MVT::f64) return TCE::MINDddd;" << std::endl;
+    }
     os << "\treturn -1; " << std::endl << "}" << std::endl;
 
     // MAX
@@ -3334,22 +3502,41 @@ TDGen::createMinMaxGenerator(std::ostream& os) {
     if (opNames_.find("MAXFfff") != opNames_.end()) {
         os << "if (vt == MVT::f32) return TCE::MAXFfff;" << std::endl;
     }
+    if (opNames_.find("MAXDddd") != opNames_.end()) {
+        os << "if (vt == MVT::f64) return TCE::MAXDddd;" << std::endl;
+    }
     os << "\treturn -1; " << std::endl << "}" << std::endl;
     
-    // MINU
+    // MINU: unsigned for integer, unordered for float
     os  << "int GeneratedTCEPlugin::getMinuOpcode(SDNode* n) const {" << std::endl;
     if (opNames_.find("MINUrrr") != opNames_.end()) {
         os << "\tEVT vt = n->getOperand(1).getValueType();" << std::endl;
         os << "if (vt == MVT::i32) return TCE::MINUrrr;" << std::endl;
     }
+    if (opNames_.find("MINUFfff") != opNames_.end()) {
+        os << "\tEVT vt = n->getOperand(1).getValueType();" << std::endl;
+        os << "if (vt == MVT::f32) return TCE::MINUFfff;" << std::endl;
+    }
+    if (opNames_.find("MINUDddd") != opNames_.end()) {
+        os << "\tEVT vt = n->getOperand(1).getValueType();" << std::endl;
+        os << "if (vt == MVT::f64) return TCE::MINUDddd;" << std::endl;
+    }
     os << "\treturn -1; " << std::endl << "}" << std::endl;
 
-    // MAXU
+    // MAXU: unsigned for integer, unordered for float
     os  << "int GeneratedTCEPlugin::getMaxuOpcode(SDNode* n) const {" << std::endl;
     if (opNames_.find("MAXUrrr") != opNames_.end()) {
         os << "\tEVT vt = n->getOperand(1).getValueType();" << std::endl;
         os << "if (vt == MVT::i32) return TCE::MAXUrrr;" << std::endl;
     }
+    if (opNames_.find("MAXUFfff") != opNames_.end()) {
+        os << "\tEVT vt = n->getOperand(1).getValueType();" << std::endl;
+        os << "if (vt == MVT::f32) return TCE::MAXUFfff;" << std::endl;
+    }
+    if (opNames_.find("MAXUDddd") != opNames_.end()) {
+        os << "\tEVT vt = n->getOperand(1).getValueType();" << std::endl;
+        os << "if (vt == MVT::f64) return TCE::MAXUDddd;" << std::endl;
+    }
     os << "\treturn -1; " << std::endl << "}" << std::endl;
 }
 
@@ -3372,6 +3559,7 @@ void TDGen::createShortExtLoadPatterns(std::ostream& os) {
            << "def : Pat<(i32 (extloadi16 ADDRri:$src)), (LDHri ADDRri:$src)>;" << std::endl;
 
 }
+//RWH: need load patterns for 64b?
 
 void
 TDGen::writeCallingConv(std::ostream& os) {
@@ -3381,9 +3569,13 @@ TDGen::writeCallingConv(std::ostream& os) {
     os << "def RetCC_TCE : CallingConv<[" << std::endl
        << "  CCIfType<[i1], CCPromoteToType<i32>>," << std::endl
        << "  CCIfType<[i32], CCAssignToReg<[IRES0]>>," << std::endl
+       << "  CCIfType<[i64], CCAssignToReg<[DIRES0]>>," << std::endl
        << "  CCIfType<[f32], CCAssignToReg<[IRES0]>>," << std::endl
+       << "  CCIfType<[f64], CCAssignToReg<[DRES0]>>," << std::endl
        << "  CCAssignToStack<4, 4>" << std::endl
        << "]>;" << std::endl << std::endl;
+    //RWH: //TODO: need to figure out function return value second column for
+    // f64
 
     os << "// Function argument value types." << std::endl;
     os << 
@@ -3392,14 +3584,14 @@ TDGen::writeCallingConv(std::ostream& os) {
         "  CCIfType<[i32], CCAssignToReg<[IRES0]>>," << std::endl;
 
     os << 
-        "  // Integer values get stored in stack slots that are 4 bytes in "
+        "  // Integer and float32 values get stored in stack slots that are 4 bytes in "
        << std::endl <<
         "  // size and 4-byte aligned." << std::endl << 
         "  CCIfType<[i32, f32], CCAssignToStack<4, 4>>," << std::endl <<
-        "  // Integer values get stored in stack slots that are 8 bytes in"
+        "  // Long integer and float64 values get stored in stack slots that are 8 bytes in"
        << std::endl <<
         "  // size and 8-byte aligned." << std::endl << 
-        "  CCIfType<[f64], CCAssignToStack<8, 8>>" << std::endl << 
+        "  CCIfType<[i64, f64], CCAssignToStack<8, 8>>" << std::endl << 
 
         "]>;" << std::endl;
 }
@@ -3474,6 +3666,9 @@ void TDGen::createSelectPatterns(std::ostream& os) {
                << "(XORbbj R1Regs:$c, 1))>;"
                << std::endl << std::endl;
             
+            //RWH: not defining for fp64 due to the coctail of instructions
+            // necessary, esp. with immediates, which aren't supported by the
+            // current instruction patterns.
             os << "def : Pat<(f32 (select R1Regs:$c, R32FPRegs:$t,R32FPRegs:$f)),"
                << "(IORfff (ANDfff R32FPRegs:$t, (SUBfir 0, (ANDext R1Regs:$c, 1))),"
                << "(ANDfff R32FPRegs:$f, (SUBfri (ANDext R1Regs:$c,1),1)))>;"
@@ -3492,6 +3687,7 @@ void TDGen::createSelectPatterns(std::ostream& os) {
             opNames_["SELECT_I32ir"] = "CMOV_SELECT";
             opNames_["SELECT_I32ri"] = "CMOV_SELECT";
             opNames_["SELECT_I32ii"] = "CMOV_SELECT";
+            opNames_["SELECT_F64"] = "CMOV_SELECT";
             opNames_["SELECT_F32"] = "CMOV_SELECT";
             opNames_["SELECT_F16"] = "CMOV_SELECT";
 
@@ -3558,6 +3754,13 @@ void TDGen::createSelectPatterns(std::ostream& os) {
                << "(select R32IRegs:$c, (i32 imm:$T), (i32 imm:$F)))]>;"
                << std::endl << std::endl
                 
+               << "def SELECT_F64 : InstTCE<(outs R64FPRegs:$dst),"
+               << "(ins R1Regs:$c, R64FPRegs:$T, R64FPRegs:$F),"
+               << "\"# SELECT_F64 PSEUDO!\","
+               << "[(set R64FPRegs:$dst,"
+               << "(select R1Regs:$c, R64FPRegs:$T, R64FPRegs:$F))]>;"
+               << std::endl << std::endl
+                
                << "def SELECT_F32 : InstTCE<(outs R32FPRegs:$dst),"
                << "(ins R1Regs:$c, R32FPRegs:$T, R32FPRegs:$F),"
                << "\"# SELECT_F32 PSEUDO!\","
diff --git a/tce/src/applibs/LLVMBackend/plugin/TCEDAGToDAGISel.cc b/tce/src/applibs/LLVMBackend/plugin/TCEDAGToDAGISel.cc
index a29efecfb1..1c79b429fa 100644
--- a/tce/src/applibs/LLVMBackend/plugin/TCEDAGToDAGISel.cc
+++ b/tce/src/applibs/LLVMBackend/plugin/TCEDAGToDAGISel.cc
@@ -200,7 +200,10 @@ TCEDAGToDAGISel::Select(SDNode* n) {
                         int opc;
                         ISD::CondCode cc = cast<CondCodeSDNode>(
                             node2->getOperand(2))->get();
-                        
+
+                        //TODO://RWH: review for i64/f64, looks like
+                        //getMinOpcode call should suffice if correct opcodes
+                        //are included in the machine
                         switch (cc) {
                         case ISD::SETLT:
                         case ISD::SETLE:
diff --git a/tce/src/applibs/LLVMBackend/plugin/TCEFrameInfo.hh b/tce/src/applibs/LLVMBackend/plugin/TCEFrameInfo.hh
index bab5f79a4f..1af905d619 100644
--- a/tce/src/applibs/LLVMBackend/plugin/TCEFrameInfo.hh
+++ b/tce/src/applibs/LLVMBackend/plugin/TCEFrameInfo.hh
@@ -45,7 +45,7 @@ namespace llvm {
      * ON EVERY LLVM UPDATE CHECK THESE INTERFACES VERY CAREFULLY
      * FROM include/llvm/Target/TargetFrameInfo.h
      *
-     * Compiler doesn warn or give error if parameter lists are changed.
+     * Compiler doesn't warn or give error if parameter lists are changed.
      * Many times also base class implementation works, but does not do
      * very good job.
      */
diff --git a/tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc b/tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc
index a438d6c234..02ede335fb 100644
--- a/tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc
+++ b/tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc
@@ -423,6 +423,8 @@ TCETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     } else if (sType == MVT::f16 || sType == MVT::f32) {
         ObjSize = 4;
         ValToStore = Val;
+        //TODO: inconsistent with argument counting code that allows f16 and
+        //f32 to be passed in registers.
     } else if (sType == MVT::i64 || sType == MVT::f64) {
         ObjSize = 8;
         ValToStore = Val;    // Whole thing is passed in memory.
@@ -469,7 +471,7 @@ TCETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Build a sequence of copy-to-reg nodes chained together with token
   // chain and flag operands which copy the outgoing args into registers.
-  // The InFlag in necessary since all emited instructions must be
+  // The InFlag is necessary since all emited instructions must be
   // stuck together.
   SDValue InFlag;
 
@@ -523,8 +525,10 @@ TCETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Copy all of the result registers out of their specified physreg. (only one rv reg)
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
-      unsigned Reg = RVLocs[i].getLocReg();
-   
+    unsigned Reg = RVLocs[i].getLocReg();
+    MVT sType = RVLocs[i].getValVT().SimpleTy;
+
+    //TODO://RWH:: add code for handling f64 return values
     Chain = DAG.getCopyFromReg(Chain, dl, Reg,
                                RVLocs[i].getValVT(), InFlag).getValue(1);
     InFlag = Chain.getValue(2);
@@ -562,8 +566,9 @@ TCETargetLowering::TCETargetLowering(
 
     addRegisterClass(MVT::i1, &TCE::R1RegsRegClass);
     addRegisterClass(MVT::i32, &TCE::R32IRegsRegClass);
-    addRegisterClass(MVT::f32, &TCE::R32FPRegsRegClass);
     addRegisterClass(MVT::f16, &TCE::R32HFPRegsRegClass);
+    addRegisterClass(MVT::f32, &TCE::R32FPRegsRegClass);
+    addRegisterClass(MVT::f64, &TCE::R64FPRegsRegClass);
 
     if (opts->useVectorBackend()) {
         switch (tm_.maxVectorSize()) {
@@ -688,6 +693,7 @@ TCETargetLowering::TCETargetLowering(
     setOperationAction(ISD::BR_CC, MVT::i32, Expand);
     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+    setOperationAction(ISD::BR_CC, MVT::f64, Expand);
     setOperationAction(ISD::MULHU,  MVT::i32, Expand);
     setOperationAction(ISD::MULHS,  MVT::i32, Expand);
     setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
@@ -719,10 +725,15 @@ TCETargetLowering::TCETargetLowering(
     setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
 
     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+    setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 #ifdef LLVM_OLDER_THAN_3_6
     setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+    setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
 #else
     setLoadExtAction(ISD::EXTLOAD, MVT::f16, MVT::f32, Expand);
+    setLoadExtAction(ISD::EXTLOAD, MVT::f16, MVT::f64, Expand);
+    setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f64, Expand);
 #endif
 
     setOperationAction(ISD::ADDE, MVT::i32, Expand);
@@ -732,7 +743,6 @@ TCETargetLowering::TCETargetLowering(
     setOperationAction(ISD::ADDE, MVT::i8, Expand);
     setOperationAction(ISD::ADDC, MVT::i8, Expand);
     
-
     setStackPointerRegisterToSaveRestore(TCE::SP);
 
     // Set missing operations that can be emulated with emulation function
@@ -1012,10 +1022,12 @@ TCETargetLowering::getRegForInlineAsmConstraint(
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'r':
-        return std::make_pair(0U, &TCE::R32IRegsRegClass);
-    case 'f':
         if (VT == MVT::f32) {
             return std::make_pair(0U, &TCE::R32FPRegsRegClass);
+        } else if (VT == MVT::f64) {
+            return std::make_pair(0U, &TCE::R64FPRegsRegClass);
+        } else {
+            return std::make_pair(0U, &TCE::R32IRegsRegClass);
         }
     }
   }
@@ -1026,7 +1038,6 @@ TCETargetLowering::getRegForInlineAsmConstraint(
 #endif
 }
 
-
 std::vector<unsigned> TCETargetLowering::
 getRegClassForInlineAsmConstraint(const std::string &Constraint,
                                   EVT VT) const {
diff --git a/tce/src/applibs/LLVMBackend/plugin/TCEInstrInfo.cc b/tce/src/applibs/LLVMBackend/plugin/TCEInstrInfo.cc
index 10d22f5123..71fe8d7a68 100644
--- a/tce/src/applibs/LLVMBackend/plugin/TCEInstrInfo.cc
+++ b/tce/src/applibs/LLVMBackend/plugin/TCEInstrInfo.cc
@@ -270,6 +270,9 @@ void TCEInstrInfo::copyPhysReg(
     } else if (TCE::R32IRegsRegClass.contains(destReg, srcReg)) {
         BuildMI(mbb, mbbi, dl, get(TCE::MOVI32rr), destReg)
 	    .addReg(srcReg, getKillRegState(killSrc));
+    } else if (TCE::R64FPRegsRegClass.contains(destReg, srcReg)) {
+        BuildMI(mbb, mbbi, dl, get(TCE::MOVF64dd), destReg)
+	    .addReg(srcReg, getKillRegState(killSrc));
     } else if (TCE::R32FPRegsRegClass.contains(destReg, srcReg)) {
         BuildMI(mbb, mbbi, dl, get(TCE::MOVF32ff), destReg)
 	    .addReg(srcReg, getKillRegState(killSrc));
diff --git a/tce/src/applibs/LLVMBackend/plugin/TCEInstrInfo.td b/tce/src/applibs/LLVMBackend/plugin/TCEInstrInfo.td
index abdbe87be3..09eddee760 100644
--- a/tce/src/applibs/LLVMBackend/plugin/TCEInstrInfo.td
+++ b/tce/src/applibs/LLVMBackend/plugin/TCEInstrInfo.td
@@ -39,6 +39,7 @@ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
 
 def TCEftoi  : SDNode<"TCEISD::FTOI", SDTTCEFTOI>;
 def TCEitof  : SDNode<"TCEISD::ITOF", SDTTCEITOF>;
+//RWH: need anything here for F64?
 
 // These are target-independent nodes, but have target-specific formats.
 def SDT_TCECallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
@@ -142,6 +143,9 @@ def PRED_FALSE_MOVI32ri : InstTCE<(outs R32IRegs:$dst),
     		       (ins R1Regs:$pred, i32imm:$src),
 	           	   "$src -> $dst;", []>;
 
+def MOVF64de : InstTCE<(outs R64FPRegs:$dst), (ins f64imm:$val),
+	           "$val -> $dst;",
+                   [(set R64FPRegs:$dst, (f64 fpimm:$val))]>;
 def MOVF32fk : InstTCE<(outs R32FPRegs:$dst), (ins f32imm:$val),
 	           "$val -> $dst;",
                    [(set R32FPRegs:$dst, (f32 fpimm:$val))]>;
@@ -151,6 +155,7 @@ def MOVF16hk : InstTCE<(outs R32HFPRegs:$dst), (ins f16imm:$val),
 }
 
 // TODO: are these really ever used for anything? propably not?
+// RWH: add a double version? doesn't fit in R32....
 let isReMaterializable = 1, isMoveImm = 1 in {
 def MOVF32fi : InstTCE<(outs R32FPRegs:$dst), (ins i32imm:$src),
 	           "$src -> $dst;",
@@ -211,6 +216,11 @@ def MOVF32ff : InstTCE<(outs R32FPRegs:$dst), (ins R32FPRegs:$src),
 	           "$src -> $dst;",
                    []>;
 
+//RWH: add moves to/from I32 or I64?
+def MOVF64dd : InstTCE<(outs R64FPRegs:$dst), (ins R64FPRegs:$src),
+	           "$src -> $dst;",
+                   []>;
+
 def MOVF16hh : InstTCE<(outs R32HFPRegs:$dst), (ins R32HFPRegs:$src),
 	           "$src -> $dst;",
                    []>;
@@ -337,6 +347,18 @@ def: Pat<(brind (i32 R32Regs:$dst)),
          (TCEBRIND R32Regs:$dst)>;
 
 // FP stores/loads
+    def LDDdr : InstTCE<(outs R64FPRegs:$op2),
+			(ins MEMrr:$op1), "", [(set R64FPRegs:$op2,
+						(load ADDRrr:$op1))]>;
+
+    def LDDdi : InstTCE<(outs R64FPRegs:$op2),
+			(ins MEMri:$op1), "", [(set R64FPRegs:$op2,
+						(load ADDRri:$op1))]>;
+
+    def STDdr : InstTCE<(outs), (ins MEMrr:$op1, R64FPRegs:$op2), "",
+			[(store R64FPRegs:$op2, ADDRrr:$op1)]>;
+    def STDdi : InstTCE<(outs), (ins MEMri:$op1, R64FPRegs:$op2), "",
+			[(store R64FPRegs:$op2, ADDRri:$op1)]>;
 
     def LDWfr : InstTCE<(outs R32FPRegs:$op2),
 			(ins MEMrr:$op1), "", [(set R32FPRegs:$op2,
@@ -723,7 +745,7 @@ def : Pat <(v8f32 (scalar_to_vector (f32 R32FPRegs:$e))),
         subvector2_0)),
     subvector4_0))>;
 
-// build done with inseet_subreg
+// build done with insert_subreg
 def : Pat <(v2i32 (build_vector (i32 R32IRegs:$e1), R32IRegs:$e2)),
     (v2i32 (INSERT_SUBREG
         (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
diff --git a/tce/src/applibs/LLVMBackend/plugin/TCETargetMachinePlugin.cc b/tce/src/applibs/LLVMBackend/plugin/TCETargetMachinePlugin.cc
index 06a2b7df4a..7c1ae3bc58 100644
--- a/tce/src/applibs/LLVMBackend/plugin/TCETargetMachinePlugin.cc
+++ b/tce/src/applibs/LLVMBackend/plugin/TCETargetMachinePlugin.cc
@@ -316,6 +316,12 @@ GeneratedTCEPlugin::operationName(unsigned opc) const {
     if (opc == TCE::MOVFI16rh) return MOVE;
     if (opc == TCE::MOVIF16hr) return MOVE;
     if (opc == TCE::MOVF16hi) return MOVE;
+    if (opc == TCE::MOVF64dd) return MOVE;
+    //if (opc == TCE::MOVF64d?) return MOVE;  //TODO: move from 64b integer immediate
+    if (opc == TCE::MOVF64de) return MOVE;
+    //if (opc == TCE::MOVI64I1rr) return MOVE;
+    //if (opc == TCE::MOVFI64?d) return MOVE; //TODO: move to 64b integer register
+    //if (opc == TCE::MOVIF64d?) return MOVE; //TODO: move from 64b integer register
 
     if (opc == TCE::INLINEASM) return INLINEASM;
 
diff --git a/tce/src/applibs/Scheduler/Algorithms/BBSchedulerController.hh b/tce/src/applibs/Scheduler/Algorithms/BBSchedulerController.hh
index 67863d251e..fcfa849178 100644
--- a/tce/src/applibs/Scheduler/Algorithms/BBSchedulerController.hh
+++ b/tce/src/applibs/Scheduler/Algorithms/BBSchedulerController.hh
@@ -57,7 +57,7 @@ namespace TTAProgram {
  * A class that control operation of a basic block scheduler.
  *
  * This class handles calling other optimization passes and tranferring data
- * for the actual BB scheduler and loop scheudlers etc.
+ * for the actual BB scheduler and loop schedulers etc.
  */
 class BBSchedulerController :
     public BasicBlockPass, public ControlFlowGraphPass, public ProcedurePass,
diff --git a/tce/src/applibs/Scheduler/Algorithms/BUBasicBlockScheduler.cc b/tce/src/applibs/Scheduler/Algorithms/BUBasicBlockScheduler.cc
index c3079c08ec..8cf2508c5f 100644
--- a/tce/src/applibs/Scheduler/Algorithms/BUBasicBlockScheduler.cc
+++ b/tce/src/applibs/Scheduler/Algorithms/BUBasicBlockScheduler.cc
@@ -737,7 +737,7 @@ BUBasicBlockScheduler::scheduleMove(
                 latestFromTrigger, ddg_->latestCycle(
                     moveNode, INT_MAX, true)); // TODO: 0 or INT_MAX
 
-            // rename if can and may alow scheuduling later.
+            // rename if can and may alow scheduling later.
             if (minRenamedEC > ddgCycle) {
                 minRenamedEC =  rm_->latestCycle(minRenamedEC, moveNode);
                 if (minRenamedEC > ddgCycle) {
diff --git a/tce/src/applibs/Scheduler/Algorithms/BasicBlockScheduler.cc b/tce/src/applibs/Scheduler/Algorithms/BasicBlockScheduler.cc
index 1ab82b5d8e..526aad4c62 100644
--- a/tce/src/applibs/Scheduler/Algorithms/BasicBlockScheduler.cc
+++ b/tce/src/applibs/Scheduler/Algorithms/BasicBlockScheduler.cc
@@ -816,7 +816,7 @@ BasicBlockScheduler::scheduleMove(
                 sourceReadyCycle, ddg_->earliestCycle(
                     moveNode, INT_MAX, true, true)); // TODO: 0 or INT_MAX
 
-            // rename if can and may alow scheuduling earlier.
+            // rename if can and may allow scheduling earlier.
             if (renamer_ != NULL && minRenamedEC < ddgCycle) {
                 minRenamedEC =  rm_->earliestCycle(minRenamedEC, moveNode);
                 if (minRenamedEC < ddgCycle) {
@@ -1051,7 +1051,7 @@ BasicBlockScheduler::scheduleInputOperandTempMoves(MoveNode& operandMove, MoveNo
 }
 
 /** 
- * Prints DDG to a dot file before and after scheudling
+ * Prints DDG to a dot file before and after scheduling
  * 
  * @param ddg to print
  * @param name operation name for ddg
diff --git a/tce/src/applibs/Scheduler/Algorithms/BypassingBUBasicBlockScheduler.cc b/tce/src/applibs/Scheduler/Algorithms/BypassingBUBasicBlockScheduler.cc
index d35aa97463..e0548c8300 100644
--- a/tce/src/applibs/Scheduler/Algorithms/BypassingBUBasicBlockScheduler.cc
+++ b/tce/src/applibs/Scheduler/Algorithms/BypassingBUBasicBlockScheduler.cc
@@ -629,7 +629,7 @@ BypassingBUBasicBlockScheduler::scheduleMoveBU(
         endCycle -= targetMachine_->controlUnit()->delaySlots();
     }
 
-    // if regcpy is unscheudled it must be rescheduled
+    // if regcpy is unscheduled it must be rescheduled
     if (t == TempRegAfter) {
         MoveNode* regCopyAfter = regCopiesAfter_[&mn];
         if (regCopyAfter != NULL && 
@@ -726,7 +726,7 @@ BypassingBUBasicBlockScheduler::scheduleMoveBU(
         scheduledMoves_.insert(&mn);
         std::cerr << "\t\tScheduled to cycle: " << rmCycle << std::endl;
 
-        // if regcpy is unscheudled it must be rescheduled
+        // if regcpy is unscheduled it must be rescheduled
         if (t == TempRegBefore) {
             MoveNode* regCopyBefore = regCopiesBefore_[&mn];
             if (regCopyBefore != NULL &&
diff --git a/tce/src/applibs/Scheduler/Algorithms/RegisterCopyAdder.cc b/tce/src/applibs/Scheduler/Algorithms/RegisterCopyAdder.cc
index 7c2f80c693..0c430c1b63 100644
--- a/tce/src/applibs/Scheduler/Algorithms/RegisterCopyAdder.cc
+++ b/tce/src/applibs/Scheduler/Algorithms/RegisterCopyAdder.cc
@@ -311,7 +311,7 @@ RegisterCopyAdder::addRegisterCopies(
  *
  * Returns 0 in case there is a connection already.
  *
- * @param originalMove The move that might not be unschedulable due to missing
+ * @param originalMove The move that might be unschedulable due to missing
  * connectivity. Will be modified to read from the temporary reg instead in
  * case connectivity is missing.
  * @param sourcePort The source port.
@@ -530,7 +530,7 @@ RegisterCopyAdder::addConnectionRegisterCopies(
        be in the ProgramOperation, i.e., an operation move. The original
        move should be either the last of the chain or the first, in case
        it's input or output move, respectively. In case of register move, 
-       the original move is considered the fisrt of the chain */
+       the original move is considered the first of the chain */
 
 
     TTAProgram::Terminal& omDest = originalMove.move().destination();
@@ -2061,7 +2061,7 @@ void RegisterCopyAdder::resultsScheduled(
 
 
 /** 
- * Find the temporary registers usef for reg copies
+ * Find the temporary registers used for reg copies
  */
 void
 RegisterCopyAdder::findTempRegisters(
diff --git a/tce/src/applibs/Scheduler/Algorithms/RegisterRenamer.cc b/tce/src/applibs/Scheduler/Algorithms/RegisterRenamer.cc
index 4d24ba0475..f157e44e84 100644
--- a/tce/src/applibs/Scheduler/Algorithms/RegisterRenamer.cc
+++ b/tce/src/applibs/Scheduler/Algorithms/RegisterRenamer.cc
@@ -55,7 +55,7 @@
 /**
  * Constructor.
  *
- * @param machine machine for which we are scheudling
+ * @param machine machine for which we are scheduling
  
 RegisterRenamer::RegisterRenamer(const TTAMachine::Machine& machine) :
     machine_(machine) {
@@ -66,7 +66,7 @@ RegisterRenamer::RegisterRenamer(const TTAMachine::Machine& machine) :
 /**
  * Constructor.
 *
- * @param machine machine for which we are scheudling
+ * @param machine machine for which we are scheduling
  */
 RegisterRenamer::RegisterRenamer(
     const TTAMachine::Machine& machine, TTAProgram::BasicBlock& bb) :
@@ -421,7 +421,7 @@ RegisterRenamer::renameDestinationRegister(
         node.move().destination().registerFile();
 
     // don't allow using same reg multiple times if loop scheduling.
-    // unscheudling would cause problems, missing war edges.
+    // unscheduling would cause problems, missing war edges.
     if (loopScheduling) {
         earliestCycle = -1;
     }
diff --git a/tce/src/applibs/Scheduler/ProgramRepresentations/DDG/DataDependenceGraph.cc b/tce/src/applibs/Scheduler/ProgramRepresentations/DDG/DataDependenceGraph.cc
index a9833d6f57..1cc7f00f8c 100644
--- a/tce/src/applibs/Scheduler/ProgramRepresentations/DDG/DataDependenceGraph.cc
+++ b/tce/src/applibs/Scheduler/ProgramRepresentations/DDG/DataDependenceGraph.cc
@@ -750,7 +750,7 @@ DataDependenceGraph::firstScheduledRegisterWrite(
 
 /**
  * Returns the highest cycle where accesses the given register.
- * If unscheudled moves accessing the register, returns INT_MAX;
+ * If unscheduled moves accessing the register, returns INT_MAX;
  * If none found, returns -1.
  *
  * @param rf The register file.
@@ -849,7 +849,7 @@ DataDependenceGraph::lastRegisterCycle(
 
 /**
  * Returns the lowest cycle where accesses the given register.
- * If unscheudled moves accessing the register, returns -1.
+ * If unscheduled moves accessing the register, returns -1.
  * If none found, return INT_MAX
  *
  * @param rf The register file.
diff --git a/tce/src/applibs/Scheduler/ResourceModel/ExecutionPipelineResource.cc b/tce/src/applibs/Scheduler/ResourceModel/ExecutionPipelineResource.cc
index f85fc9558b..9f2fb3d010 100644
--- a/tce/src/applibs/Scheduler/ResourceModel/ExecutionPipelineResource.cc
+++ b/tce/src/applibs/Scheduler/ResourceModel/ExecutionPipelineResource.cc
@@ -1526,13 +1526,13 @@ int ExecutionPipelineResource::resultReadyCycle(
  *
  * This checks that the guards are exclusive, and that the moves are
  * to be scheduled in same cycle (one already scheduled, on is going to
- * be scheudled to given cycle, which has to be the same.
+ * be scheduled to given cycle, which has to be the same.
  * the same cycle requirements makes sure the value of the guard cannot be
  * changed between the moves.
  *
  * @param mn1 movenode which has already been scheduled
  * @param mn2 move which we are going to schedule
- * @param cycle cycle where we are going to scheudle mn2.
+ * @param cycle cycle where we are going to schedule mn2.
  */
 bool ExecutionPipelineResource::exclusiveMoves(
     const MoveNode* mn1, const MoveNode* mn2, int cycle) const {
@@ -1817,7 +1817,7 @@ ExecutionPipelineResource::resultNotOverWritten(
         // no loop scheduling.
         ii = INT_MAX;
     } else {
-        // loop scheudling.
+        // loop scheduling.
         // make sure opeation does not cause conflict
         // with itself on next loop iteration.
         if (resultReadyCycle + (int)ii <= resultReadCycle) {
@@ -1958,7 +1958,7 @@ bool ExecutionPipelineResource::checkOperandAllowed(
 
                 // fail if the other operand happens eaelier than this (it has later usage).
                 
-                // loop scheudling, op overlaps
+                // loop scheduling, op overlaps
                 // need to also check that is not written before the use.
                 if (operandUseModCycle <
                     instructionIndex(mn.cycle())) {
diff --git a/tce/tools/patches/clang-3.5-64bit-doubles-not-forced-to-single.patch b/tce/tools/patches/clang-3.5-64bit-doubles-not-forced-to-single.patch
new file mode 100644
index 0000000000..3fb29b1a06
--- /dev/null
+++ b/tce/tools/patches/clang-3.5-64bit-doubles-not-forced-to-single.patch
@@ -0,0 +1,30 @@
+This patch allows the TCE target machine to use 64bit doubles instead of 
+forcing them to 32bit (single precision "float").
+
+Index: lib/Basic/Targets.cpp
+===================================================================
+--- tools/clang/lib/Basic/Targets.cpp	(revision 222960)
++++ tools/clang/lib/Basic/Targets.cpp	(working copy)
+@@ -5219,15 +5219,15 @@
+       PtrDiffType = SignedInt;
+       FloatWidth = 32;
+       FloatAlign = 32;
+-      DoubleWidth = 32;
+-      DoubleAlign = 32;
+-      LongDoubleWidth = 32;
+-      LongDoubleAlign = 32;
++      DoubleWidth = 64;
++      DoubleAlign = 64;
++      LongDoubleWidth = 64;
++      LongDoubleAlign = 64;
+       FloatFormat = &llvm::APFloat::IEEEsingle;
+-      DoubleFormat = &llvm::APFloat::IEEEsingle;
+-      LongDoubleFormat = &llvm::APFloat::IEEEsingle;
++      DoubleFormat = &llvm::APFloat::IEEEdouble;
++      LongDoubleFormat = &llvm::APFloat::IEEEdouble;
+       DescriptionString = "E-p:32:32-i8:8:32-i16:16:32-i64:32"
+-                          "-f64:32-v64:32-v128:32-a:0:32-n32";
++                          "-f64:64:64-v64:32-v128:32-a:0:32-n32";
+       AddrSpaceMap = &TCEOpenCLAddrSpaceMap;
+       UseAddrSpaceMapMangling = true;
+     }
diff --git a/tce/tools/patches/clang-3.7-64bit-doubles-not-forced-to-single.patch b/tce/tools/patches/clang-3.7-64bit-doubles-not-forced-to-single.patch
new file mode 100644
index 0000000000..70d9f3e92c
--- /dev/null
+++ b/tce/tools/patches/clang-3.7-64bit-doubles-not-forced-to-single.patch
@@ -0,0 +1,32 @@
+This patch allows the TCE target machine to use 64bit doubles instead of 
+forcing them to 32bit (single precision "float").
+
+Index: lib/Basic/Targets.cpp
+===================================================================
+--- tools/clang/lib/Basic/Targets.cpp	(revision 222960)
++++ tools/clang/lib/Basic/Targets.cpp	(working copy)
+@@ -5973,17 +5973,17 @@
+       PtrDiffType = SignedInt;
+       FloatWidth = 32;
+       FloatAlign = 32;
+-      DoubleWidth = 32;
+-      DoubleAlign = 32;
+-      LongDoubleWidth = 32;
+-      LongDoubleAlign = 32;
++      DoubleWidth = 64;
++      DoubleAlign = 64;
++      LongDoubleWidth = 64;
++      LongDoubleAlign = 64;
+       FloatFormat = &llvm::APFloat::IEEEsingle;
+-      DoubleFormat = &llvm::APFloat::IEEEsingle;
+-      LongDoubleFormat = &llvm::APFloat::IEEEsingle;
++      DoubleFormat = &llvm::APFloat::IEEEdouble;
++      LongDoubleFormat = &llvm::APFloat::IEEEdouble;
+       DescriptionString = "E-p:32:32:32-i1:8:8-i8:8:32-"
+           "i16:16:32-i32:32:32-i64:32:32-"
+-          "f32:32:32-f64:32:32-v64:32:32-"
++          "f32:32:32-f64:64:64-v64:32:32-"
+           "v128:32:32-v256:32:32-v512:32:32-v1024:32:32-a0:0:32-n32";
+       AddrSpaceMap = &TCEOpenCLAddrSpaceMap;
+       UseAddrSpaceMapMangling = true;
+     }
diff --git a/tce/tools/scripts/install_llvm_3.5.sh b/tce/tools/scripts/install_llvm_3.5.sh
index 936b3dc759..7d6a9bf0a0 100755
--- a/tce/tools/scripts/install_llvm_3.5.sh
+++ b/tce/tools/scripts/install_llvm_3.5.sh
@@ -55,10 +55,15 @@ fi
 
 cd ../../$llvm_co_dir
 
-##### Add patches here.
-# ...
-#####
+# apply patches
+patch -Np0 < $patch_dir/clang-3.5-64bit-doubles-not-forced-to-single.patch
 
 ./configure $LLVM_BUILD_MODE --enable-shared --prefix=$TARGET_DIR || eexit "Configuring LLVM/Clang failed."
 make -j2 CXXFLAGS="-std=c++11" REQUIRES_RTTI=1 || eexit "Building LLVM/Clang failed."
-make install || eexit "Installation of LLVM/Clang failed."
+if -w $TARGET_DIR 
+then
+    make install || eexit "Installation of LLVM/Clang failed."
+else
+    echo "Installation directory not writable, enter 'sudo' password or cancel and install manually."
+    sudo make install || eexit "Installation of LLVM/Clang failed."
+fi
diff --git a/tce/tools/scripts/install_llvm_3.7.sh b/tce/tools/scripts/install_llvm_3.7.sh
index 5176608da0..a7f2d2f29b 100755
--- a/tce/tools/scripts/install_llvm_3.7.sh
+++ b/tce/tools/scripts/install_llvm_3.7.sh
@@ -61,10 +61,18 @@ patch -Np0 < $patch_dir/llvm-3.7-tce.patch
 patch -Np0 < $patch_dir/llvm-3.7-tcele.patch
 patch -Np0 < $patch_dir/llvm-3.7-memcpyoptimizer-only-on-default-as.patch
 patch -Np0 < $patch_dir/llvm-3.5-loopidiomrecognize-only-on-default-as.patch
+patch -Np0 < $patch_dir/clang-3.7-64bit-doubles-not-forced-to-single.patch
 #####
 
 mkdir -p build
 cd build
 ../configure $LLVM_BUILD_MODE --enable-bindings=none --enable-shared --prefix=$TARGET_DIR || eexit "Configuring LLVM/Clang failed."
-make -j2 CXXFLAGS="-std=c++11" REQUIRES_RTTI=1 || eexit "Building LLVM/Clang failed."
-make install || eexit "Installation of LLVM/Clang failed."
+make -j4 CXXFLAGS="-std=c++11" REQUIRES_RTTI=1 || eexit "Building LLVM/Clang failed."
+mkdir -p "$TARGET_DIR"
+if [ -w "$TARGET_DIR" ]; then
+    make install || eexit "Installation of LLVM/Clang failed."
+else
+    echo "Installation directory not writable, enter 'sudo' password or cancel and install manually."
+    sudo mkdir -p "$TARGET_DIR"
+    sudo make install || eexit "Installation of LLVM/Clang failed."
+fi
diff --git a/testsuite/systemtest/bintools/Compiler/data/doubles.adf b/testsuite/systemtest/bintools/Compiler/data/doubles.adf
new file mode 100644
index 0000000000..5b1db08c79
--- /dev/null
+++ b/testsuite/systemtest/bintools/Compiler/data/doubles.adf
@@ -0,0 +1,1307 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
+<adf version="1.7">
+
+  <bus name="B1">
+    <width>32</width>
+    <guard>
+      <always-true/>
+    </guard>
+    <guard>
+      <simple-expr>
+        <bool>
+          <name>BOOL</name>
+          <index>0</index>
+        </bool>
+      </simple-expr>
+    </guard>
+    <guard>
+      <inverted-expr>
+        <bool>
+          <name>BOOL</name>
+          <index>0</index>
+        </bool>
+      </inverted-expr>
+    </guard>
+    <guard>
+      <simple-expr>
+        <bool>
+          <name>BOOL</name>
+          <index>1</index>
+        </bool>
+      </simple-expr>
+    </guard>
+    <guard>
+      <inverted-expr>
+        <bool>
+          <name>BOOL</name>
+          <index>1</index>
+        </bool>
+      </inverted-expr>
+    </guard>
+    <segment name="seg1">
+      <writes-to/>
+    </segment>
+    <short-immediate>
+      <extension>zero</extension>
+      <width>32</width>
+    </short-immediate>
+  </bus>
+
+  <bus name="B64">
+    <width>64</width>
+    <guard>
+      <always-true/>
+    </guard>
+    <guard>
+      <simple-expr>
+        <bool>
+          <name>BOOL</name>
+          <index>0</index>
+        </bool>
+      </simple-expr>
+    </guard>
+    <guard>
+      <inverted-expr>
+        <bool>
+          <name>BOOL</name>
+          <index>0</index>
+        </bool>
+      </inverted-expr>
+    </guard>
+    <guard>
+      <simple-expr>
+        <bool>
+          <name>BOOL</name>
+          <index>1</index>
+        </bool>
+      </simple-expr>
+    </guard>
+    <guard>
+      <inverted-expr>
+        <bool>
+          <name>BOOL</name>
+          <index>1</index>
+        </bool>
+      </inverted-expr>
+    </guard>
+    <segment name="seg1">
+      <writes-to/>
+    </segment>
+    <short-immediate>
+      <extension>zero</extension>
+      <width>0</width>
+    </short-immediate>
+  </bus>
+
+  <socket name="lsu_i1">
+    <reads-from>
+      <bus>B1</bus>
+      <segment>seg1</segment>
+    </reads-from>
+  </socket>
+
+  <socket name="lsu_o1">
+    <writes-to>
+      <bus>B1</bus>
+      <segment>seg1</segment>
+    </writes-to>
+  </socket>
+
+  <socket name="lsu_i2">
+    <reads-from>
+      <bus>B1</bus>
+      <segment>seg1</segment>
+    </reads-from>
+  </socket>
+
+  <socket name="RF_i1">
+    <reads-from>
+      <bus>B1</bus>
+      <segment>seg1</segment>
+    </reads-from>
+  </socket>
+
+  <socket name="RF_o1">
+    <writes-to>
+      <bus>B1</bus>
+      <segment>seg1</segment>
+    </writes-to>
+  </socket>
+
+  <socket name="bool_i1">
+    <reads-from>
+      <bus>B1</bus>
+      <segment>seg1</segment>
+    </reads-from>
+    <reads-from>
+      <bus>B64</bus>
+      <segment>seg1</segment>
+    </reads-from>
+  </socket>
+
+  <socket name="bool_o1">
+    <writes-to>
+      <bus>B1</bus>
+      <segment>seg1</segment>
+    </writes-to>
+    <writes-to>
+      <bus>B64</bus>
+      <segment>seg1</segment>
+    </writes-to>
+  </socket>
+
+  <socket name="gcu_i1">
+    <reads-from>
+      <bus>B1</bus>
+      <segment>seg1</segment>
+    </reads-from>
+  </socket>
+
+  <socket name="gcu_i2">
+    <reads-from>
+      <bus>B1</bus>
+      <segment>seg1</segment>
+    </reads-from>
+  </socket>
+
+  <socket name="gcu_o1">
+    <writes-to>
+      <bus>B1</bus>
+      <segment>seg1</segment>
+    </writes-to>
+  </socket>
+
+  <socket name="ALU_i1">
+    <reads-from>
+      <bus>B1</bus>
+      <segment>seg1</segment>
+    </reads-from>
+  </socket>
+
+  <socket name="ALU_i2">
+    <reads-from>
+      <bus>B1</bus>
+      <segment>seg1</segment>
+    </reads-from>
+  </socket>
+
+  <socket name="ALU_o1">
+    <writes-to>
+      <bus>B1</bus>
+      <segment>seg1</segment>
+    </writes-to>
+  </socket>
+
+  <socket name="FPU_i1">
+    <reads-from>
+      <bus>B64</bus>
+      <segment>seg1</segment>
+    </reads-from>
+  </socket>
+
+  <socket name="FPU_i2">
+    <reads-from>
+      <bus>B64</bus>
+      <segment>seg1</segment>
+    </reads-from>
+  </socket>
+
+  <socket name="FPU_o1">
+    <writes-to>
+      <bus>B64</bus>
+      <segment>seg1</segment>
+    </writes-to>
+  </socket>
+
+  <socket name="LSDU_i1">
+    <reads-from>
+      <bus>B1</bus>
+      <segment>seg1</segment>
+    </reads-from>
+  </socket>
+
+  <socket name="LSDU_i2">
+    <reads-from>
+      <bus>B64</bus>
+      <segment>seg1</segment>
+    </reads-from>
+  </socket>
+
+  <socket name="LSDU_o1">
+    <writes-to>
+      <bus>B64</bus>
+      <segment>seg1</segment>
+    </writes-to>
+  </socket>
+
+  <socket name="RF64_o1">
+    <writes-to>
+      <bus>B64</bus>
+      <segment>seg1</segment>
+    </writes-to>
+    <writes-to>
+      <bus>B1</bus>
+      <segment>seg1</segment>
+    </writes-to>
+  </socket>
+
+  <socket name="RF64_i1">
+    <reads-from>
+      <bus>B64</bus>
+      <segment>seg1</segment>
+    </reads-from>
+    <reads-from>
+      <bus>B1</bus>
+      <segment>seg1</segment>
+    </reads-from>
+  </socket>
+
+  <function-unit name="LSU">
+    <port name="in1t">
+      <connects-to>lsu_i1</connects-to>
+      <width>32</width>
+      <triggers/>
+      <sets-opcode/>
+    </port>
+    <port name="out1">
+      <connects-to>lsu_o1</connects-to>
+      <width>32</width>
+    </port>
+    <port name="in2">
+      <connects-to>lsu_i2</connects-to>
+      <width>32</width>
+    </port>
+    <operation>
+      <name>ldw</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="2">
+          <start-cycle>2</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>ldq</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="2">
+          <start-cycle>2</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>ldh</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="2">
+          <start-cycle>2</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>stw</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">in2</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>stq</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">in2</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>sth</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">in2</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>ldqu</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="2">
+          <start-cycle>2</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>ldhu</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="2">
+          <start-cycle>2</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <address-space>data</address-space>
+  </function-unit>
+
+  <function-unit name="ALU">
+    <port name="in1t">
+      <connects-to>ALU_i1</connects-to>
+      <width>32</width>
+      <triggers/>
+      <sets-opcode/>
+    </port>
+    <port name="in2">
+      <connects-to>ALU_i2</connects-to>
+      <width>32</width>
+    </port>
+    <port name="out1">
+      <connects-to>ALU_o1</connects-to>
+      <width>32</width>
+    </port>
+    <operation>
+      <name>add</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">in2</bind>
+      <bind name="3">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>and</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">in2</bind>
+      <bind name="3">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>eq</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">in2</bind>
+      <bind name="3">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>gt</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">in2</bind>
+      <bind name="3">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>gtu</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">in2</bind>
+      <bind name="3">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>ior</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">in2</bind>
+      <bind name="3">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>shl</name>
+      <bind name="1">in2</bind>
+      <bind name="2">in1t</bind>
+      <bind name="3">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>shr</name>
+      <bind name="1">in2</bind>
+      <bind name="2">in1t</bind>
+      <bind name="3">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>shru</name>
+      <bind name="1">in2</bind>
+      <bind name="2">in1t</bind>
+      <bind name="3">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>sub</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">in2</bind>
+      <bind name="3">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>sxhw</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>sxqw</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>xor</name>
+      <bind name="1">in1t</bind>
+      <bind name="2">in2</bind>
+      <bind name="3">out1</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <address-space/>
+  </function-unit>
+
+  <function-unit name="FPU">
+    <port name="i1">
+      <connects-to>FPU_i1</connects-to>
+      <width>64</width>
+    </port>
+    <port name="i2">
+      <connects-to>FPU_i2</connects-to>
+      <width>64</width>
+      <triggers/>
+      <sets-opcode/>
+    </port>
+    <port name="out">
+      <connects-to>FPU_o1</connects-to>
+      <width>64</width>
+    </port>
+    <operation>
+      <name>absd</name>
+      <bind name="1">i2</bind>
+      <bind name="2">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>addd</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>divd</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>eqd</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>equd</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>ged</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>geud</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>gtd</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>gtud</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>invsqrtd</name>
+      <bind name="1">i2</bind>
+      <bind name="2">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>led</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>leud</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>ltd</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>ltud</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>maxd</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>mind</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>muld</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>ned</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>negd</name>
+      <bind name="1">i2</bind>
+      <bind name="2">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>neud</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>ordd</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>sqrtd</name>
+      <bind name="1">i2</bind>
+      <bind name="2">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>squared</name>
+      <bind name="1">i2</bind>
+      <bind name="2">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>subd</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>uordd</name>
+      <bind name="1">i1</bind>
+      <bind name="2">i2</bind>
+      <bind name="3">out</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="3">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <address-space/>
+  </function-unit>
+
+  <function-unit name="DLSU">
+    <port name="adr">
+      <connects-to>LSDU_i1</connects-to>
+      <width>32</width>
+      <triggers/>
+      <sets-opcode/>
+    </port>
+    <port name="di">
+      <connects-to>LSDU_i2</connects-to>
+      <width>64</width>
+    </port>
+    <port name="do">
+      <connects-to>LSDU_o1</connects-to>
+      <width>64</width>
+    </port>
+    <operation>
+      <name>ldd</name>
+      <bind name="1">adr</bind>
+      <bind name="2">do</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <writes name="2">
+          <start-cycle>2</start-cycle>
+          <cycles>1</cycles>
+        </writes>
+      </pipeline>
+    </operation>
+    <operation>
+      <name>std</name>
+      <bind name="1">adr</bind>
+      <bind name="2">di</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+        <reads name="2">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+      </pipeline>
+    </operation>
+    <address-space>data</address-space>
+  </function-unit>
+
+  <register-file name="RF">
+    <type>normal</type>
+    <size>8</size>
+    <width>32</width>
+    <max-reads>1</max-reads>
+    <max-writes>1</max-writes>
+    <port name="wr">
+      <connects-to>RF_i1</connects-to>
+    </port>
+    <port name="rd">
+      <connects-to>RF_o1</connects-to>
+    </port>
+  </register-file>
+
+  <register-file name="BOOL">
+    <type>normal</type>
+    <size>2</size>
+    <width>1</width>
+    <max-reads>1</max-reads>
+    <max-writes>1</max-writes>
+    <port name="wr">
+      <connects-to>bool_i1</connects-to>
+    </port>
+    <port name="rd">
+      <connects-to>bool_o1</connects-to>
+    </port>
+  </register-file>
+
+  <register-file name="RF64">
+    <type>normal</type>
+    <size>8</size>
+    <width>64</width>
+    <max-reads>0</max-reads>
+    <max-writes>0</max-writes>
+    <port name="rd">
+      <connects-to>RF64_o1</connects-to>
+    </port>
+    <port name="wr">
+      <connects-to>RF64_i1</connects-to>
+    </port>
+  </register-file>
+
+  <address-space name="data">
+    <width>8</width>
+    <min-address>0</min-address>
+    <max-address>65535</max-address>
+    <numerical-id>0</numerical-id>
+  </address-space>
+
+  <address-space name="instructions">
+    <width>8</width>
+    <min-address>0</min-address>
+    <max-address>65535</max-address>
+  </address-space>
+
+  <address-space name="data64">
+    <width>8</width>
+    <min-address>0</min-address>
+    <max-address>65535</max-address>
+    <shared-memory>0</shared-memory>
+    <numerical-id>1</numerical-id>
+  </address-space>
+
+  <global-control-unit name="gcu">
+    <port name="pc">
+      <connects-to>gcu_i1</connects-to>
+      <width>32</width>
+      <triggers/>
+      <sets-opcode/>
+    </port>
+    <special-port name="ra">
+      <connects-to>gcu_i2</connects-to>
+      <connects-to>gcu_o1</connects-to>
+      <width>32</width>
+    </special-port>
+    <return-address>ra</return-address>
+    <ctrl-operation>
+      <name>jump</name>
+      <bind name="1">pc</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+      </pipeline>
+    </ctrl-operation>
+    <ctrl-operation>
+      <name>call</name>
+      <bind name="1">pc</bind>
+      <pipeline>
+        <reads name="1">
+          <start-cycle>0</start-cycle>
+          <cycles>1</cycles>
+        </reads>
+      </pipeline>
+    </ctrl-operation>
+    <address-space>instructions</address-space>
+    <delay-slots>3</delay-slots>
+    <guard-latency>1</guard-latency>
+  </global-control-unit>
+
+</adf>
diff --git a/testsuite/systemtest/bintools/Compiler/data/doubles.cc b/testsuite/systemtest/bintools/Compiler/data/doubles.cc
new file mode 100644
index 0000000000..8d7eebff0b
--- /dev/null
+++ b/testsuite/systemtest/bintools/Compiler/data/doubles.cc
@@ -0,0 +1,129 @@
+/* 
+ * Test code for double-precision operations/operands.
+ */
+
+//#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include "tceops.h"
+extern "C" {
+#include <lwpr.h>
+}
+
+// this really belongs in tce/bclib/include/lwpr.h
+void lwpr_print_str(const char* str)
+{
+    lwpr_print_str(const_cast<char*>(str));
+}
+
+
+// check storing double data to DATA section
+volatile double data64[16] = { 1.0, -2.0,  2.0/3.0, 10.0, 
+                              11.0, 12.0, 12.0+5.0,  0.9, //tanh(0.9), 
+                               1.2,  3.1, -4.0-2.0,  4.4, //atan2(-4.0, 3.0), 
+                               2.3,  4.5,  1.1*4.9,  4.5};//pow(1.3, 4.5)};
+
+
+// helper function until we can get fabs sorted out
+//#define my_fabs(val)  (((val)>=0) ? (val) : (-(val)))
+//inline double my_fabs(double val)
+//{
+//    if (val >= 0.0)
+//        return val;
+//    else
+//        return -val;
+//}
+double my_fabs(double val)
+{
+    //return std::abs(val);
+    double abs_val;
+    _TCE_ABSD(val, abs_val);
+    return abs_val;
+}
+
+// helper function, check for approximate equality, update success
+void check_approx_eql(int &succ, double desired, double actual, const char *msg, double tol=5e-15)
+{
+    // gentle variation from absolute tolerance at actual==0 to |actual|==1.0
+    double abs_tol;
+    if (my_fabs(actual) < 1.0) {
+        // hybrid absolute/relative tolerance: absolute tolerance of tol/2 at
+        // actual==0, increases quadratically to tol*actual (with the right
+        // derivative) at |actual|==1.0
+        abs_tol = 0.5*tol * (1.0 + actual * actual);
+    } else {
+        // typical relative error measure, i.e. the absolute tolerance is
+        // proportional to the actual value
+        abs_tol = my_fabs(tol * actual);
+    }
+
+    // tolerance test
+    if (my_fabs(desired - actual) > abs_tol) {
+        lwpr_print_str(msg);
+        succ = 0;
+    }
+}
+
+
+// main test function
+int main()
+{
+    int succ = 1;
+    double a, b;
+
+    // quick, simple tests
+    check_approx_eql(succ, 1.567, my_fabs(-1.567), "fabs (absd) failed");
+
+//future:    // test that memory stores are correct
+//future:    a = 2.0;
+//future:    b = 3.0;
+//future:    check_approx_eql(succ, a/b, data64[2], "stored divide result failed");
+//future:    a = 12.0;
+//future:    b = 5.0;
+//future:    check_approx_eql(succ, a+b, data64[6], "stored add result failed");
+//future:    a = -4.0;
+//future:    b = 2.0;
+//future:    check_approx_eql(succ, a-b, data64[10], "stored subtract result failed");
+//future:    a = 1.1;
+//future:    b = 4.9;
+//future:    check_approx_eql(succ, a*b, data64[14], "stored multiply result failed");
+//future:
+//future:    a = data64[14];
+//future:    b = data64[15];
+//future:    
+//future:    a = 1.4;
+//future:
+//future:    data64[2] = a + b;
+//future:    data64[3] = a - b;
+//future:    data64[4] = a * b;
+//future:    data64[5] = a / b;
+
+
+    return !succ;
+}
+
+
+/* FPU opcode list:
+ *   absd 
+ *   negd
+ *   addd
+ *   subd
+ *   muld
+ *   divd
+ *   squared
+ *   sqrtd
+ *   invsqrtd
+ *   eqd   equd
+ *   ged   geud
+ *   gtd   gtud
+ *   led   leud
+ *   ltd   ltud
+ *   ned   neud
+ *   ordd  uordd
+ *
+ * FPU doesn't contain LDD/STD or MACD.  The LSDU handles LDD/STD.
+ *
+ * Found instruction MSUD, not in FPU, not sure what it does.  ..UD is probably
+ * "unordered double".  Tried to add it anyway, ProDe says I have the wrong
+ * number/type of operands or something like that.
+ */
diff --git a/testsuite/systemtest/bintools/Compiler/tcetest_double_support.sh b/testsuite/systemtest/bintools/Compiler/tcetest_double_support.sh
new file mode 100755
index 0000000000..4ba4b4a8e5
--- /dev/null
+++ b/testsuite/systemtest/bintools/Compiler/tcetest_double_support.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+### TCE TESTCASE
+### title: Test double-precision operations and operands
+
+#old:mach=data/multi_addrspace.adf
+#old:src=data/multi_addrspace.c
+mach=data/doubles.adf
+src=data/doubles.cc
+program=$(mktemp tmpXXXXXX)
+
+#set -x
+tcecc -a $mach -O3 $src -o $program && \
+ttasim -a $mach -p $program --no-debugmode
+
+rm -f $program
+