Skip to content

Commit d227cbd

Browse files
helinwangfacaiy
authored andcommitted
Implement Addons>ParseTime operator. (#530)
The parse time operator parses an input string according to the provided format string into a Unix time, the number of seconds / milliseconds / microseconds / nanoseconds elapsed since January 1, 1970 UTC. Fixes: #492
1 parent d2f7db2 commit d227cbd

File tree

7 files changed

+376
-1
lines changed

7 files changed

+376
-1
lines changed

tensorflow_addons/custom_ops/text/BUILD

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,21 @@ cc_binary(
2121
"@local_config_tf//:tf_header_lib",
2222
],
2323
)
24+
25+
cc_binary(
26+
name = "_parse_time_op.so",
27+
srcs = [
28+
"cc/kernels/parse_time_kernel.cc",
29+
"cc/ops/parse_time_op.cc",
30+
],
31+
copts = [
32+
"-pthread",
33+
"-std=c++11",
34+
D_GLIBCXX_USE_CXX11_ABI,
35+
],
36+
linkshared = 1,
37+
deps = [
38+
"@local_config_tf//:libtensorflow_framework",
39+
"@local_config_tf//:tf_header_lib",
40+
],
41+
)
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
==============================================================================*/
15+
16+
#include <string>
17+
18+
#include "absl/time/time.h"
19+
#include "tensorflow/core/framework/op_kernel.h"
20+
21+
namespace tensorflow {
22+
namespace addons {
23+
24+
using ::tensorflow::OpKernel;
25+
using ::tensorflow::OpKernelConstruction;
26+
using ::tensorflow::OpKernelContext;
27+
using ::tensorflow::Tensor;
28+
using ::tensorflow::tstring;
29+
30+
enum OutputUnit {
31+
SECOND = 1,
32+
MILLISECOND = 2,
33+
MICROSECOND = 3,
34+
NANOSECOND = 4,
35+
};
36+
37+
bool OutputUnitFromString(string output_unit_str, OutputUnit* output_unit) {
38+
if (output_unit_str == "SECOND") {
39+
*output_unit = SECOND;
40+
} else if (output_unit_str == "MILLISECOND") {
41+
*output_unit = MILLISECOND;
42+
} else if (output_unit_str == "MICROSECOND") {
43+
*output_unit = MICROSECOND;
44+
} else if (output_unit_str == "NANOSECOND") {
45+
*output_unit = NANOSECOND;
46+
} else {
47+
return false;
48+
}
49+
return true;
50+
}
51+
52+
class ParseTimeOp : public OpKernel {
53+
public:
54+
explicit ParseTimeOp(OpKernelConstruction* context) : OpKernel(context) {
55+
string output_unit_str;
56+
OP_REQUIRES_OK(context, context->GetAttr("time_format", &time_format_));
57+
OP_REQUIRES_OK(context, context->GetAttr("output_unit", &output_unit_str));
58+
OP_REQUIRES(context, OutputUnitFromString(output_unit_str, &output_unit_),
59+
errors::InvalidArgument("Invalid output unit"));
60+
}
61+
62+
void Compute(OpKernelContext* context) override {
63+
const Tensor& input_tensor = context->input(0);
64+
auto input = input_tensor.flat<tstring>();
65+
66+
Tensor* output_tensor = nullptr;
67+
OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
68+
&output_tensor));
69+
70+
auto output_flat = output_tensor->flat<int64>();
71+
const int n = input.size();
72+
for (int i = 0; i < n; ++i) {
73+
absl::Time time;
74+
std::string err;
75+
OP_REQUIRES(context, absl::ParseTime(time_format_, input(i), &time, &err),
76+
errors::InvalidArgument("Parse time failed: ", err));
77+
switch (output_unit_) {
78+
case SECOND:
79+
output_flat(i) = absl::ToUnixSeconds(time);
80+
break;
81+
case MILLISECOND:
82+
output_flat(i) = absl::ToUnixMillis(time);
83+
break;
84+
case MICROSECOND:
85+
output_flat(i) = absl::ToUnixMicros(time);
86+
break;
87+
case NANOSECOND:
88+
output_flat(i) = absl::ToUnixNanos(time);
89+
break;
90+
}
91+
}
92+
}
93+
94+
private:
95+
std::string time_format_;
96+
OutputUnit output_unit_;
97+
};
98+
99+
REGISTER_KERNEL_BUILDER(Name("Addons>ParseTime").Device(tensorflow::DEVICE_CPU),
100+
ParseTimeOp);
101+
102+
} // end namespace addons
103+
} // end namespace tensorflow
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
==============================================================================*/
15+
16+
#include "tensorflow/core/framework/common_shape_fns.h"
17+
#include "tensorflow/core/framework/op.h"
18+
19+
namespace tensorflow {
20+
namespace addons {
21+
REGISTER_OP("Addons>ParseTime")
22+
.Input("time_string: string")
23+
.Output("time_int64: int64")
24+
.Attr("time_format: string")
25+
.Attr("output_unit: {'SECOND', 'MILLISECOND', 'MICROSECOND', 'NANOSECOND'}")
26+
.SetShapeFn(tensorflow::shape_inference::UnchangedShape)
27+
.Doc(R"doc(
28+
Parse an input string according to the provided format string into a Unix time,
29+
the number of seconds / milliseconds / microseconds / nanoseconds elapsed since
30+
January 1, 1970 UTC.
31+
32+
Uses strftime()-like formatting options, with the same extensions as
33+
FormatTime(), but with the exceptions that %E#S is interpreted as %E*S, and %E#f
34+
as %E*f. %Ez and %E*z also accept the same inputs.
35+
36+
%Y consumes as many numeric characters as it can, so the matching data should
37+
always be terminated with a non-numeric. %E4Y always consumes exactly four
38+
characters, including any sign.
39+
40+
Unspecified fields are taken from the default date and time of ...
41+
42+
"1970-01-01 00:00:00.0 +0000"
43+
44+
For example, parsing a string of "15:45" (%H:%M) will return an Unix time that
45+
represents "1970-01-01 15:45:00.0 +0000".
46+
47+
Note that ParseTime only heeds the fields year, month, day, hour, minute,
48+
(fractional) second, and UTC offset. Other fields, like weekday (%a or %A),
49+
while parsed for syntactic validity, are ignored in the conversion.
50+
51+
Date and time fields that are out-of-range will be treated as errors rather than
52+
normalizing them like `absl::CivilSecond` does. For example, it is an error to
53+
parse the date "Oct 32, 2013" because 32 is out of range.
54+
55+
A leap second of ":60" is normalized to ":00" of the following minute with
56+
fractional seconds discarded. The following table shows how the given seconds
57+
and subseconds will be parsed:
58+
59+
"59.x" -> 59.x // exact
60+
"60.x" -> 00.0 // normalized
61+
"00.x" -> 00.x // exact
62+
63+
time_string: the input time string to be parsed.
64+
time_format: the time format.
65+
time_int64: the number of seconds / milliseconds / microseconds / nanoseconds
66+
elapsed since January 1, 1970 UTC.
67+
output_unit: the output unit of the parsed unix time. Can only be SECOND,
68+
MILLISECOND, MICROSECOND, NANOSECOND.
69+
)doc");
70+
} // end namespace addons
71+
} // end namespace tensorflow

tensorflow_addons/text/BUILD

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@ py_library(
77
srcs = ([
88
"__init__.py",
99
"crf.py",
10+
"parse_time_op.py",
1011
"skip_gram_ops.py",
1112
]),
1213
data = [
14+
"//tensorflow_addons/custom_ops/text:_parse_time_op.so",
1315
"//tensorflow_addons/custom_ops/text:_skip_gram_ops.so",
1416
"//tensorflow_addons/utils",
1517
],
@@ -41,3 +43,16 @@ py_test(
4143
":text",
4244
],
4345
)
46+
47+
py_test(
48+
name = "parse_time_op_test",
49+
size = "small",
50+
srcs = [
51+
"parse_time_op_test.py",
52+
],
53+
main = "parse_time_op_test.py",
54+
srcs_version = "PY2AND3",
55+
deps = [
56+
":text",
57+
],
58+
)

tensorflow_addons/text/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,7 @@
3232

3333
# Skip Gram Sampling
3434
from tensorflow_addons.text.skip_gram_ops import skip_gram_sample
35-
from tensorflow_addons.text.skip_gram_ops import skip_gram_sample_with_text_vocab
35+
from tensorflow_addons.text.skip_gram_ops import skip_gram_sample_with_text_vocab
36+
37+
# Parse Time
38+
from tensorflow_addons.text.parse_time_op import parse_time
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ==============================================================================
15+
"""Parse time ops."""
16+
from __future__ import absolute_import
17+
from __future__ import division
18+
from __future__ import print_function
19+
20+
import tensorflow as tf
21+
22+
from tensorflow_addons.utils.resource_loader import get_path_to_datafile
23+
24+
_parse_time_op = tf.load_op_library(
25+
get_path_to_datafile("custom_ops/text/_parse_time_op.so"))
26+
27+
tf.no_gradient("Addons>ParseTime")
28+
29+
30+
def parse_time(time_string, time_format, output_unit):
31+
"""Parse an input string according to the provided format string into a
32+
Unix time.
33+
34+
Parse an input string according to the provided format string into a Unix
35+
time, the number of seconds / milliseconds / microseconds / nanoseconds
36+
elapsed since January 1, 1970 UTC.
37+
38+
Uses strftime()-like formatting options, with the same extensions as
39+
FormatTime(), but with the exceptions that %E#S is interpreted as %E*S, and
40+
%E#f as %E*f. %Ez and %E*z also accept the same inputs.
41+
42+
%Y consumes as many numeric characters as it can, so the matching
43+
data should always be terminated with a non-numeric. %E4Y always
44+
consumes exactly four characters, including any sign.
45+
46+
Unspecified fields are taken from the default date and time of ...
47+
48+
"1970-01-01 00:00:00.0 +0000"
49+
50+
For example, parsing a string of "15:45" (%H:%M) will return an
51+
Unix time that represents "1970-01-01 15:45:00.0 +0000".
52+
53+
Note that ParseTime only heeds the fields year, month, day, hour,
54+
minute, (fractional) second, and UTC offset. Other fields, like
55+
weekday (%a or %A), while parsed for syntactic validity, are
56+
ignored in the conversion.
57+
58+
Date and time fields that are out-of-range will be treated as
59+
errors rather than normalizing them like `absl::CivilSecond` does.
60+
For example, it is an error to parse the date "Oct 32, 2013"
61+
because 32 is out of range.
62+
63+
A leap second of ":60" is normalized to ":00" of the following
64+
minute with fractional seconds discarded. The following table
65+
shows how the given seconds and subseconds will be parsed:
66+
67+
"59.x" -> 59.x // exact
68+
"60.x" -> 00.0 // normalized
69+
"00.x" -> 00.x // exact
70+
71+
Args:
72+
time_string: The input time string to be parsed.
73+
time_format: The time format.
74+
output_unit: The output unit of the parsed unix time. Can only be SECOND,
75+
MILLISECOND, MICROSECOND, NANOSECOND.
76+
77+
Returns:
78+
the number of seconds / milliseconds / microseconds / nanoseconds elapsed
79+
since January 1, 1970 UTC.
80+
81+
Raises:
82+
ValueError: If `output_unit` is not a valid value,
83+
if parsing `time_string` according to `time_format` failed.
84+
"""
85+
return _parse_time_op.addons_parse_time(time_string, time_format,
86+
output_unit)

0 commit comments

Comments
 (0)