import json import os import random import pandas import numpy # import matplotlib from sklearn.metrics.pairwise import cosine_similarity from openai import OpenAI os.environ["OPENAI_API_KEY"]= "sk-PRJ811XeKzEy20Ug3dA98a34Af8b40B5816dE15503D33599" os.environ["OPENAI_BASE_URL"]= "http://154.9.28.247:3000/v1/" client = OpenAI() def batch(): scales = os.listdir("Scales") items={} for i in scales: with open("Scales/"+i,"r") as scale: tmp = json.load(scale) for i in tmp["item"]: items[i]=tmp["item"][i] # print(items) return items def reverse_list(): scales = os.listdir("Scales") items=[] for i in scales: with open("Scales/"+i,"r") as scale: tmp = json.load(scale) items.extend(tmp["reverse"]) # print(items) return items def old_type(str): with open(str,"r") as file: scale=json.load(file) new={"item":{}} for i in scale: new["item"][i["name"]]=i["label"] # print(i["name"],i["label"]) with open(str,"w") as file: file.write(json.dumps(new)) def calc_similarity(scale): item=[] vec=[] for i in scale: item.append(i) vec.append(client.embeddings.create( input=scale[i], model="text-embedding-3-large" # nomic-embed-text text-embedding-3-small ).data[0].embedding) simi=cosine_similarity(vec) que=[] for i,v in enumerate(simi): for j in range(0,i): que.append({"from":item[j], "to":item[i], "similarity":simi[i][j]}) return que def similarity(force:bool = False,sort:bool=True): if force or os.path.getsize("Temp/items.json") == 0: que=calc_similarity(batch()) with open("Temp/items.json","w") as items: items.write(json.dumps(que)) else: with open("Temp/items.json","r") as items: que = json.load(items) if sort: return sorted(que, key = lambda t : t["similarity"], reverse=True) else: return que def make_data(): s="" item = batch() for i in item: s+=i+',' s=s[:-1]+'\n' for i in range(0,1000): s += str(random.randint(0,4)) for j in range(1,20): s += ',' + str(random.randint(0,4)) s+='\n' with open("data.csv","w") as data: data.write(s) def corelation(): data = pandas.read_csv("Work/data.csv") rev=reverse_list() print(rev) que={} for i in data: for j in data: if i!=j: try: que[i,j]=data[i].corr(data[j]) if i in rev: que[i,j]*=-1 if j in rev: que[i,j]*=-1 que[j,i]=que[i,j] except: pass else: break return que