GloVe is a useful tool for rapidly generating word embeddings. I am using this with DNA sequences now to experiment with machine learning techniques in genomics.
Loading these embeddings into TensorFlow is essential for my experiments. Here is how to do it in Python.
embedding_dim = 256
with open("Glove/vocab.txt", 'r') as f:
words = [x.rstrip().split(' ')[0] for x in f.readlines()]
with open("Glove/vectors.txt", 'r') as f:
vectors = {}
for line in f:
vals = line.rstrip().split(' ')
vectors[vals[0]] = [float(x) for x in vals[1:]]
vocab_size = len(words)
vocab = {w: idx for idx, w in enumerate(words)}
ivocab = {idx: w for idx, w in enumerate(words)}
vector_dim = len(vectors[ivocab[0]])
W = np.zeros((vocab_size, vector_dim))
for word, v in vectors.items():
if word == '':
continue
W[vocab[word], :] = v
# normalize each word vector to unit variance
W_norm = np.zeros(W.shape)
d = (np.sum(W ** 2, 1) ** (0.5))
W_norm = (W.T / d).T
Weights = tf.Variable(tf.constant(0.0, shape=[vocab_size, embedding_dim]), trainable=False, name="Weights")
embedding_placeholder = tf.placeholder(tf.float32, [vocab_size, embedding_dim])
embedding_init = Weights.assign(embedding_placeholder)
sess = tf.Session()
sess.run(embedding_init, feed_dict={embedding_placeholder: W_norm})