在pandas的apply函数中,没有expand这个参数。因此需要将lambda函数中的expand参数删除。修改后的代码如下:
import pandas as pd
import jieba.posseg as pseg
# 读取数据
data = pd.read_excel('华为.xlsx')
# 定义函数,用于提取PROP和ADJ
def extract_opinion(text):
words = pseg.cut(text)
prop_list = []
adj_list = []
for word, flag in words:
if flag == 'prop':
prop_list.append(word)
elif flag == 'adj':
adj_list.append(word)
return prop_list, adj_list
# 对每条评论进行观点提取,并将结果添加到新的列中
data[['PROP', 'ADJ']] = data['评价内容'].apply(lambda x: pd.Series(extract_opinion(x)))
# 统计总条数和各自占比
total_count = len(data)
positive_count = len(data[data['情感倾向'] == 'Positive'])
neutral_count = len(data[data['情感倾向'] == 'Neutral'])
negative_count = len(data[data['情感倾向'] == 'Negative'])
positive_ratio = positive_count / total_count
neutral_ratio = neutral_count / total_count
negative_ratio = negative_count / total_count
# 输出若干项抽取的PROP和ADJ
print(data[['评价内容', 'PROP', 'ADJ']].head(10))
# 输出总条数和各自占比
print('总条数:', total_count)
print('正向占比:', positive_ratio)
print('中立占比:', neutral_ratio)
print('负向占比:', negative_ratio)
# 将结果保存到文件中
result = pd.DataFrame({'总条数': [total_count], '正向占比': [positive_ratio], '中立占比': [neutral_ratio], '负向占比': [negative_ratio]})
result.to_excel('10gxx_opinion.xlsx', index=False)