How To Create A Frequency Tensor Out Of Two Tensor In Tensorflow
I have a tensor like this in which the values are the frequency and the rows are the index(0 to 6): tf_docs = [[0, 2], [1, 2], [2, 1], [5, 0], [0, 1], [7, 8], [9, 6]] I hav
Solution 1:
Let nonzero_tf_docs
be defined as:
zero_tf_docs = tf.cast(tf.equal(tf_docs, tf.zeros_like(tf_docs)), tf.int32)
nonzero_tf_docs = 1 - tf.reduce_max(zero_tf_docs, axis=-1)
The OP is asking to compute the sum nonzero_tf_docs[i] + nonzero_tf_docs[j]
for each pair of indices i, j
in tf_topics
and display the result in a matrix. This can be achieved as follows:
def compute_result(tf_topics_, nonzero_tf_docs, tf_docs):
# Find matrix lower part
values = tf.reduce_sum(tf.gather(nonzero_tf_docs, tf_topics_), axis=-1)
max_index = tf.reduce_max(tf_topics) + 1
out_sparse = tf.sparse.SparseTensor(indices=tf_topics_, values=values, dense_shape=[max_index, max_index])
out_sparse = tf.cast(out_sparse, dtype=tf.int32)
out_sparse = tf.sparse.reorder(out_sparse)
out_dense = tf.sparse.to_dense(out_sparse, default_value=-1)
out_lower = tf.matrix_band_part(out_dense, -1, 0)
# Compute diagonal
diag_values = tf.reduce_sum(tf_docs, axis=-1)
diag = tf.slice(diag_values,
begin=[0],
size=[max_index])
# Construct output matrix
out = out_lower + tf.transpose(out_lower)
mask = tf.eye(max_index, dtype=tf.int32)
out = (1 - mask) * out + mask * diag
return out
# Find docs without zeros
zero_tf_docs = tf.cast(tf.equal(tf_docs, tf.zeros_like(tf_docs)), tf.int32)
nonzero_tf_docs = 1 - tf.reduce_max(zero_tf_docs, axis=-1)
# Transform counts into matrix format
tf_topics = tf.cast(tf_topics, dtype=tf.int64)
tf_topics_reversed = tf.reverse(tf_topics, [-1])
tf_topics_ = tf_topics_reversed
out_1 = compute_result(tf_topics_, nonzero_tf_docs, tf_docs)
out_2 = compute_result(tf_topics, nonzero_tf_docs, tf_docs)
out = tf.maximum(out_1, out_2)
with tf.Session() as sess:
r = sess.run(out)
print(r) # prints [[ 2110 -1]
# [ 13211]
# [ 12311]
# [ 01150]
# [-11101]]
Solution 2:
Thanks to your latest edits and to rvinas' answer I think I finally understood what you need. One of the things that was confusing me was the fact that there are "null" cells in the output matrix. Anyway, here is a way to do that:
import tensorflow as tf
deffreq_matrix(tf_docs, tf_topics):
tf_docs = tf.convert_to_tensor(tf_docs)
tf_topics = tf.convert_to_tensor(tf_topics)
# Sort indices to make upper diagonal
tf_topics = tf.sort(tf_topics, axis=1)
# Largest index
m = tf.reduce_max(tf_topics) + 1# Remove duplicates
topics_flat = tf_topics[:, 0] * m + tf_topics[:, 1]
topics_uniq, _ = tf.unique(topics_flat)
tf_topics = tf.stack([topics_uniq // m, topics_uniq % m], axis=1)
# Make diagonal
diag = tf.reduce_sum(tf_docs[:m], axis=1)
# Find non-zero positions in docs
docs_nz = tf.not_equal(tf_docs, 0)
# Get for each pair
docs_g = tf.gather(docs_nz, tf_topics)
# Find number of matches
matches = tf.math.logical_and(docs_g[:, 0], docs_g[:, 1])
freq = tf.reduce_sum(tf.dtypes.cast(matches, tf_docs.dtype), axis=1)
# Add one to all values to subtract one at the end
diag += 1
freq += 1# Make upper diagonal
out = tf.scatter_nd(tf_topics, freq, [m, m])
# Make symmetric
out += tf.transpose(out)
# Add diagonal
out += tf.linalg.diag(diag)
# Subtract one to mark empty cells
out -= 1return out
# Test
tf_docs = tf.constant([[0, 2], [1, 2], [2, 1], [5, 0], [0, 1], [7, 8], [9, 6]])
tf_topics = tf.constant([[1, 2], [1, 3], [1, 0], [2, 3], [2, 0], [3, 0],
[3, 4], [3, 2], [3, 1], [4, 2], [4, 1], [2, 1]])
print(freq_matrix(tf_docs, tf_topics).numpy())
# [[ 2 1 1 0 -1]# [ 1 3 2 1 1]# [ 1 2 3 1 1]# [ 0 1 1 5 0]# [-1 1 1 0 1]]
Post a Comment for "How To Create A Frequency Tensor Out Of Two Tensor In Tensorflow"