extract name from pinyin list

This commit is contained in:
2023-04-20 00:00:43 +02:00
parent 83b6c91f80
commit 42e634f34d
3 changed files with 140 additions and 0 deletions
+30
View File
@@ -0,0 +1,30 @@
from pypinyin import pinyin, lazy_pinyin, Style
def read_name_from_files_by_line():
# Using readlines()
file1 = open('/Users/lpan/Downloads/Chinese_Names_Corpus.txt', 'r')
file2 = open('pinyin_list.txt', 'w')
lines = file1.readlines()
count = 0
# Strips the newline character
for line in lines:
count += 1
print("Line{}: {}".format(count, line.strip()))
name_to_save = convert_name_to_pinyin(line.strip())
file2.writelines(name_to_save + "\n")
print(name_to_save)
file1.close()
file2.close()
def convert_name_to_pinyin(name: str):
name_in_pinyin_list = lazy_pinyin(name)
true_list = []
for item in name_in_pinyin_list:
if item != '\ufeff':
true_list.append(item)
return true_list[0] + " " + "".join(true_list[1:len(true_list)])
read_name_from_files_by_line()