extract name from pinyin list
This commit is contained in:
@@ -0,0 +1,30 @@
|
||||
from pypinyin import pinyin, lazy_pinyin, Style
|
||||
|
||||
|
||||
def read_name_from_files_by_line():
|
||||
# Using readlines()
|
||||
file1 = open('/Users/lpan/Downloads/Chinese_Names_Corpus.txt', 'r')
|
||||
file2 = open('pinyin_list.txt', 'w')
|
||||
lines = file1.readlines()
|
||||
count = 0
|
||||
# Strips the newline character
|
||||
for line in lines:
|
||||
count += 1
|
||||
print("Line{}: {}".format(count, line.strip()))
|
||||
name_to_save = convert_name_to_pinyin(line.strip())
|
||||
file2.writelines(name_to_save + "\n")
|
||||
print(name_to_save)
|
||||
file1.close()
|
||||
file2.close()
|
||||
|
||||
|
||||
def convert_name_to_pinyin(name: str):
|
||||
name_in_pinyin_list = lazy_pinyin(name)
|
||||
true_list = []
|
||||
for item in name_in_pinyin_list:
|
||||
if item != '\ufeff':
|
||||
true_list.append(item)
|
||||
return true_list[0] + " " + "".join(true_list[1:len(true_list)])
|
||||
|
||||
|
||||
read_name_from_files_by_line()
|
||||
Reference in New Issue
Block a user