This commit is contained in:
tmikolov 2013-08-01 19:28:55 +00:00
parent 9f78f72254
commit 25b0cc66a9
1 changed files with 12 additions and 0 deletions

12
demo-phrase-accuracy.sh Normal file
View File

@ -0,0 +1,12 @@
make
if [ ! -e text8 ]; then
wget http://mattmahoney.net/dc/text8.zip -O text8.gz
gzip -d text8.gz -f
fi
echo ----------------------------------------------------------------------------------------------------------------
echo Note that the accuracy and coverage of the test set questions is going to be low with this small training corpus
echo To achieve better accuracy, larger training set is needed
echo ----------------------------------------------------------------------------------------------------------------
time ./word2phrase -train text8 -output text8-phrase -threshold 500 -debug 2 -min-count 3
time ./word2vec -train text8-phrase -output vectors-phrase.bin -cbow 0 -size 300 -window 10 -negative 0 -hs 1 -sample 1e-3 -threads 12 -binary 1 -min-count 3
./compute-accuracy vectors-phrase.bin <questions-phrases.txt