1
私は2つのファイルを持っています。numpy.load()間違った魔法の文字列エラー
File "Cluster.py", line 7, in <module>
matrix = np.load(matrix_file)
File "C:\Users\jarek\Anaconda2\lib\site-packages\numpy\lib\npyio.py", line 406, in load
pickle_kwargs=pickle_kwargs)
File "C:\Users\jarek\Anaconda2\lib\site-packages\numpy\lib\format.py", line 620, in read_array
version = read_magic(fp)
File "C:\Users\jarek\Anaconda2\lib\site-packages\numpy\lib\format.py", line 216, in read_magic
raise ValueError(msg % (MAGIC_PREFIX, magic_str[:-2]))
ValueError: the magic string is not correct; expected '\x93NUMPY', got '\x00\x00I\x1c\x00\x00'
:一つは、しかし、私は次のエラーを取得する...
import numpy as np
from scipy.cluster.hierarchy import dendrogram, linkage
import json
import pdb
with open('matrix/matrix.npy', 'r') as matrix_file:
matrix = np.load(matrix_file)
hcluster = linkage(matrix, "complete")
を次のファイルが同じファイルをロードすることになっている圧縮スパース行形式
from sklearn.feature_extraction.text import TfidfTransformer
import pdb
def stem_document(document):
translatedict = ""
stemmer = PorterStemmer()
for word in string.punctuation:
translatedict = translatedict + word
doc_stemmed = []
for word in document.split():
lowerstrippedword = ''.join(c for c in word.lower() if c not in translatedict)
try:
stemmed_word = stemmer.stem(lowerstrippedword)
doc_stemmed.append(stemmed_word)
except:
print lowerstrippedword + " could not be stemmed."
return ' '.join(doc_stemmed)
def readFileandStem(filestring):
with open(filestring, 'r') as file:
reader = csv.reader(file)
file_extras = []
vector_data = []
error = False
while (error == False):
try:
next = reader.next()
if len(next) == 3 and next[2] != "":
document = next[2]
stemmed_document = stem_document(document)
vector_data.append(stemmed_document)
file_extra = []
file_extra.append(next[0])
file_extra.append(next[1])
file_extras.append(file_extra)
except:
error = True
return [vector_data, file_extras]
filestring = 'Data.csv'
print "Reading File"
data = readFileandStem(filestring)
documents = data[0]
file_extras = data[1]
print "Vectorizing Data"
vectorizer = CountVectorizer()
matrix = vectorizer.fit_transform(documents)
tf_idf_transform = TfidfTransformer(use_idf=False).fit(matrix)
tf_idf_matrix = tf_idf_transform.transform(matrix)
with open('matrix/matrix.npy', 'w') as matrix_file:
np.save(matrix_file, tf_idf_matrix)
file_json_map = {}
file_json_map['extras'] = file_extras
with open('matrix/extras.json', 'w') as extras_file:
extras_file.write(json.dumps(file_json_map))
print "finished"
でnumpy
配列を作成します
私が調べたことから、すべての.npyファイルが同じ魔法の文字列 "\ x93NUMPY"を持っているため、魔法の文字列が正しくない理由はわかりません。
アイデア?
は 'with open(blahblah)as matrix_file'ではありません。単に 'np.load(blahblah)'を試してください – Jeon
その解決策はありません。試してみました: "matrix = np.load( 'matrix/matrix.npy')" –