main.py：

- 添加了读取整个文件夹内scale json - 可以从我们旧的json格式转换为新的 ItemRelate.py： - 修复错误的变量名 - 简化Embedding调用 - 转为读取item list - 修复了排序
2024-05-14 13:04:54 +08:00 · 2024-05-14 13:04:54 +08:00 · 82b958122b
commit 82b958122b
parent 8717b81bbc
4 changed files with 38 additions and 62 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,3 @@
 __pycache__/*
+Temp/
+Scales/
--- a/ItemRelate.py
+++ b/ItemRelate.py
@ -7,24 +7,16 @@ client = OpenAI()

 from sklearn.metrics.pairwise import cosine_similarity

-import json
-
-def embedding(s:str):
-	if len(s)==0:
-		return
-	else:
-		return client.embeddings.create(
-    		input=s, model="nomic-embed-text" # nomic-embed-text text-embedding-3-small
-			).data[0].embedding
-
-def main(scale:json):
+def main(scale):

 	item=[]
 	vec=[]

-	for i in scale["item"]:
+	for i in scale:
 		item.append(i)
-		vec.append(embedding(scale["item"][i]))
+		vec.append(client.embeddings.create(
+    		input=scale[i], model="text-embedding-3-small" # nomic-embed-text text-embedding-3-small
+			).data[0].embedding)

 	simi=cosine_similarity(vec)

@ -32,10 +24,6 @@ def main(scale:json):

 	for i,v in enumerate(simi):
 		for j in range(0,i):
-			# print(simi[i][j],',',item[i],',',item[j])
-			que.append({"from":item[j], "i":item[i], "similarity":simi[i][j]})
-		# print("\n")
+			que.append({"from":item[j], "to":item[i], "similarity":simi[i][j]})

-	sorted(que,key= lambda t : t["similarity"])
-
-	return que
+	return 	sorted(que, key = lambda t : t["similarity"], reverse=True)
--- a/TestFile/adhd.json
+++ b/TestFile/adhd.json
@ -1,36 +0,0 @@
-{
-    "name":"成人多动症自筛表 (6 itmes)",
-    "introduction":1,
-    "item":{
-        "asrs1": "当一项工作最有挑战性的部分完成后，你经常难以做到对细节部分更精益求精吗?",
-        "asrs2": "你经常很难把有组织性的任务、事情布置得井井有条吗? ",
-        "asrs3": "当你从事一项需要脑力的工作的时候，你经常回避或者延长开始的时间吗？",
-        "asrs4": "你经常很难记住约会或者一些必须完成的事情吗?",
-        "asrs5": "当您必须安静地坐很长时间时，是否经常表现得坐立不安，手脚动作多?",
-        "asrs6": "你是否在自觉做事情时表现得极度活跃，非完成某项事情不可，好像在被发动机所驱使一样?"
-    },
-    "value": {
-        "从不":0,
-        "很少":1,
-        "有时":2,
-        "经常":3,
-        "总是":4
-    },
-    "result":[
-        {
-            "name": "ADHD",
-            "sum": [
-                "asrs1",
-                "asrs2",
-                "asrs3",
-                "asrs4",
-                "asrs5",
-                "asrs6"
-            ],
-            "re_sum": [],
-            "result": {
-                "0": 2
-            }
-        }
-    ]
-}
--- a/main.py
+++ b/main.py
@ -2,14 +2,36 @@ from ItemRelate import main as relate
 import json
 import os

-if os.path.getsize("TestFile/items.json") == 0:
-	with open("TestFile/adhd.json","r") as json_file:
-		scale = json.load(json_file)
-	que=relate(scale)
-	with open("TestFile/items.json","w") as items:
+def batch():
+	scales = os.listdir("Scales")
+	items={}
+	for i in scales:
+		with open("Scales/"+i,"r") as scale:
+			tmp = json.load(scale)
+			for i in tmp["item"]:
+				items[i]=tmp["item"][i]
+	# print(items)
+	return items
+
+def old_type(str):
+	with open(str,"r") as file:
+		scale=json.load(file)
+	new={"item":{}}
+	for i in scale:
+		new["item"][i["name"]]=i["label"]
+		# print(i["name"],i["label"])
+	with open(str,"w") as file:
+		file.write(json.dumps(new))
+
+# old_type("Scales/ceE.json")
+
+if os.path.getsize("Temp/items.json") == 0:
+	que=relate(batch())
+	with open("Temp/items.json","w") as items:
 		items.write(json.dumps(que))
 else:
-	with open("TestFile/items.json","r") as items:
+	with open("Temp/items.json","r") as items:
 		que = json.load(items)

-print(que)
+for i in que:
+	print(i)