目录
前言
size参数决定词向量训练的纬度,size如何选择,这里做了个实验
#!/usr/bin/env python
# coding=utf-8
# Creative time 2020/3/20
# Creator HongYuan Guo
from gensim.models import word2vec
from gensim.models import Doc2Vec
import time
class WordVector:
def __init__(self):
self.corpus_path = 'Corpus\corpus_**.txt'
def Train_model(self,path,Size):
sentences = word2vec.Text8Corpus(path) # 加载语料
model = word2vec.Word2Vec(sentences, size=Size) # 默认window=5
model.save('static_models\WordVector_JD60W_'+str(Size)+'.model')
return model
def get_model(self,path,size):
try:
model = Doc2Vec.load('static_models\WordVector_JD60W_'+str(size)+'.model')
print('模型已存在')
except:
print('正在训练模型')
self.Train_model(path,size)
print('训练结束')
finally:
model = Doc2