From fa47ca07bc87c41e15bd847be8938fde6a4aaa2b Mon Sep 17 00:00:00 2001
From: "Xiaoming (Jason) Cui" <xiaoming.cui@intel.com>
Date: Wed, 21 Mar 2018 10:29:27 -0700
Subject: [PATCH 1/3] removed source_reverse from wmt16_gnmt_8_layer.json,
 since this hparam has been removed from NMT code, and it caused training and
 inference failures when use this .json file

---
 nmt/standard_hparams/wmt16_gnmt_8_layer.json | 1 -
 1 file changed, 1 deletion(-)
diff --git a/nmt/standard_hparams/wmt16_gnmt_8_layer.json b/nmt/standard_hparams/wmt16_gnmt_8_layer.json
index 438ddcf55..da2034ca7 100644
--- a/nmt/standard_hparams/wmt16_gnmt_8_layer.json
+++ b/nmt/standard_hparams/wmt16_gnmt_8_layer.json
@@ -22,7 +22,6 @@
   "share_vocab": false,
   "subword_option": "bpe",
   "sos": "<s>",
-  "source_reverse": false,
   "src_max_len": 50,
   "src_max_len_infer": null,
   "steps_per_external_eval": null,

From 8c3a240f3ff3ef707637d026dc52d63a2f4ae744 Mon Sep 17 00:00:00 2001
From: "Xiaoming (Jason) Cui" <xiaoming.cui@intel.com>
Date: Tue, 3 Apr 2018 00:36:36 -0700
Subject: [PATCH 2/3] add command line option to control the num_inter_threads
 and num_intra_threads for inference session

---
 nmt/inference.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/nmt/inference.py b/nmt/inference.py
index 6f589337a..cf7924b5d 100644
--- a/nmt/inference.py
+++ b/nmt/inference.py
@@ -131,7 +131,10 @@ def single_worker_inference(infer_model,
   infer_data = load_data(inference_input_file, hparams)
 
   with tf.Session(
-      graph=infer_model.graph, config=utils.get_config_proto()) as sess:
+      graph=infer_model.graph, config=utils.get_config_proto(
+        num_intra_threads=hparams.num_intra_threads,
+        num_inter_threads=hparams.num_inter_threads
+        )) as sess:
     loaded_infer_model = model_helper.load_model(
         infer_model.model, ckpt, sess, "infer")
     sess.run(
@@ -190,7 +193,10 @@ def multi_worker_inference(infer_model,
   infer_data = infer_data[start_position:end_position]
 
   with tf.Session(
-      graph=infer_model.graph, config=utils.get_config_proto()) as sess:
+      graph=infer_model.graph, config=utils.get_config_proto(
+        num_intra_threads=hparams.num_intra_threads,
+        num_inter_threads=hparams.num_inter_threads
+      )) as sess:
     loaded_infer_model = model_helper.load_model(
         infer_model.model, ckpt, sess, "infer")
     sess.run(infer_model.iterator.initializer,

From a8026c0541c774e93f64d79a80ef8419aae9cce1 Mon Sep 17 00:00:00 2001
From: "Xiaoming (Jason) Cui" <xiaoming.cui@intel.com>
Date: Thu, 20 Dec 2018 09:58:38 -0800
Subject: [PATCH 3/3] Fixed a bug of CPU parallism of the model. when
 tf.Session() call starts with no parameters, the interop and intraop
 parallism setting will use default values, and won't be able to change later,
 which will lead to lower performance if the model is running on CPU because
 of no optimized parallism config being set

---
 nmt/nmt.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/nmt/nmt.py b/nmt/nmt.py
index f5823d893..fce143bf8 100644
--- a/nmt/nmt.py
+++ b/nmt/nmt.py
@@ -620,10 +620,6 @@ def run_main(flags, default_hparams, train_fn, inference_fn, target_session=""):
   num_workers = flags.num_workers
   utils.print_out("# Job id %d" % jobid)
 
-  # GPU device
-  utils.print_out(
-      "# Devices visible to TensorFlow: %s" % repr(tf.Session().list_devices()))
-
   # Random
   random_seed = flags.random_seed
   if random_seed is not None and random_seed > 0:
@@ -653,6 +649,14 @@ def run_main(flags, default_hparams, train_fn, inference_fn, target_session=""):
         out_dir, default_hparams, flags.hparams_path,
         save_hparams=(jobid == 0))
 
+  # GPU device
+  config_proto = utils.get_config_proto(
+      allow_soft_placement=True,
+      num_intra_threads=hparams.num_intra_threads,
+      num_inter_threads=hparams.num_inter_threads)
+  utils.print_out(
+      "# Devices visible to TensorFlow: %s" % repr(tf.Session(config=config_proto).list_devices()))
+
   ## Train / Decode
   if flags.inference_input_file:
     # Inference output directory