diff --git a/ItemRelate.py b/ItemRelate.py deleted file mode 100644 index b3791c9..0000000 --- a/ItemRelate.py +++ /dev/null @@ -1,29 +0,0 @@ -import os -os.environ["OPENAI_API_KEY"]= "sk-PRJ811XeKzEy20Ug3dA98a34Af8b40B5816dE15503D33599" -os.environ["OPENAI_BASE_URL"]= "http://154.9.28.247:3000/v1/" - -from openai import OpenAI -client = OpenAI() - -from sklearn.metrics.pairwise import cosine_similarity - -def main(scale): - - item=[] - vec=[] - - for i in scale: - item.append(i) - vec.append(client.embeddings.create( - input=scale[i], model="text-embedding-3-small" # nomic-embed-text text-embedding-3-small - ).data[0].embedding) - - simi=cosine_similarity(vec) - - que=[] - - for i,v in enumerate(simi): - for j in range(0,i): - que.append({"from":item[j], "to":item[i], "similarity":simi[i][j]}) - - return sorted(que, key = lambda t : t["similarity"], reverse=True) \ No newline at end of file diff --git a/file_load.py b/file_load.py index 3c0814f..4d34680 100644 --- a/file_load.py +++ b/file_load.py @@ -1,9 +1,32 @@ -from ItemRelate import main as relate - import json import os import random +from openai import OpenAI +client = OpenAI() + +os.environ["OPENAI_API_KEY"]= "sk-PRJ811XeKzEy20Ug3dA98a34Af8b40B5816dE15503D33599" +os.environ["OPENAI_BASE_URL"]= "http://154.9.28.247:3000/v1/" + + + +from sklearn.metrics.pairwise import cosine_similarity + +def calc_similarity(scale): + item=[] + vec=[] + for i in scale: + item.append(i) + vec.append(client.embeddings.create( + input=scale[i], model="text-embedding-3-small" # nomic-embed-text text-embedding-3-small + ).data[0].embedding) + simi=cosine_similarity(vec) + que=[] + for i,v in enumerate(simi): + for j in range(0,i): + que.append({"from":item[j], "to":item[i], "similarity":simi[i][j]}) + return sorted(que, key = lambda t : t["similarity"], reverse=True) + def batch(): scales = os.listdir("Scales") items={} @@ -25,31 +48,26 @@ def old_type(str): with open(str,"w") as file: file.write(json.dumps(new)) -def data(): - s="" - - item = batch() - for i in item: - s+=i+',' - s=s[:-1]+'\n' - - for i in range(0,1000): - s += str(random.randint(0,4)) - for j in range(1,20): - s += ',' + str(random.randint(0,4)) - s+='\n' - - with open("Temp/data.csv","w") as data: - data.write(s) - - -def main(force:bool = False): +def calc_similarity(force:bool = False): if force or os.path.getsize("Temp/items.json") == 0: - que=relate(batch()) + que=embedding(batch()) with open("Temp/items.json","w") as items: items.write(json.dumps(que)) else: with open("Temp/items.json","r") as items: que = json.load(items) - data() return que + +def data(): + s="" + item = batch() + for i in item: + s+=i+',' + s=s[:-1]+'\n' + for i in range(0,1000): + s += str(random.randint(0,4)) + for j in range(1,20): + s += ',' + str(random.randint(0,4)) + s+='\n' + with open("Temp/data.csv","w") as data: + data.write(s) diff --git a/main.py b/main.py index 34ce745..a3f70ab 100644 --- a/main.py +++ b/main.py @@ -1,10 +1,11 @@ -from file_load import main as init +import file_load import json import os -que = init() +similarity = file_load.similarity() +file_load.data() -for i in que: +for i in similarity: print(i)