以上是一份聊天记录,如你所见除了部分无关聊天外,充斥着关于物...
Author:匿名用户
2026/01/05 09:12
Description
编写Python代码从聊天记录中自动提取物流价格、时效和地点等结构化信息,支持中英文单位转换和数据验证。
Tags
代码编程提取信息内容生成
Content
```python
# 提取物流价格时效信息的Python代码
import re
from datetime import datetime
def extract_logistics_info(chat_log):
"""
从聊天记录中提取物流价格时效信息
参数:
chat_log (str): 原始聊天记录文本
返回:
list: 包含提取信息的字典列表
"""
# 初始化结果列表
results = []
# 定义匹配客服回复的正则表达式
# 匹配格式如: "用户名 日期 时间 消息内容"
pattern = r'^(.*?)\(.*?\)\s+(\d{1,2}/\d{1,2})\s+(\d{1,2}:\d{1,2}:\d{1,2})\s+(.*)$'
# 处理每行聊天记录
for line in chat_log.split('\n'):
if not line.strip():
continue
match = re.match(pattern, line)
if match:
username, date, time, message = match.groups()
# 只处理客服的回复 (假设客服用户名包含"斯比特"或"客服")
if '斯比特' in username or '客服' in username:
# 提取价格信息 (匹配数字+货币符号)
price_match = re.search(r'(\d+\.?\d*)\s*(元|RMB|¥|¥)', message)
price = price_match.group(0) if price_match else None
# 提取时效信息 (匹配"X天"或"X-Y天"等)
delivery_match = re.search(r'(\d+[-~]?\d*)\s*(天|日|工作日)', message)
delivery_time = delivery_match.group(0) if delivery_match else None
# 提取出发地/目的地 (简单匹配)
from_match = re.search(r'从(.*?)[发运|到|至]', message)
from_loc = from_match.group(1).strip() if from_match else None
to_match = re.search(r'[到|至|发往](.*?)(?:$|,|。|;)', message)
to_loc = to_match.group(1).strip() if to_match else None
# 提取体积重量参数
weight_match = re.search(r'(\d+\.?\d*)\s*(kg|KG|千克|公斤)', message)
weight = weight_match.group(0) if weight_match else None
volume_match = re.search(r'(\d+\.?\d*)\s*(m³|立方米|方)', message)
volume = volume_match.group(0) if volume_match else None
# 如果有提取到任何信息,则添加到结果
if any([price, delivery_time, from_loc, to_loc, weight, volume]):
# 计算承诺时效 (基于回复时间)
try:
reply_date = datetime.strptime(f"{date} {time}", "%m/%d %H:%M:%S")
if '今天' in message or '当天' in message:
promise_date = reply_date
elif '明天' in message:
promise_date = reply_date.replace(day=reply_date.day+1)
else:
# 匹配具体日期如"21号交货"
date_match = re.search(r'(\d{1,2})[号日]交', message)
if date_match:
delivery_day = int(date_match.group(1))
current_year = datetime.now().year
promise_date = datetime.strptime(f"{date.split('/')[0]}/{delivery_day} 23:59:59", "%m/%d %H:%M:%S")
promise_date = promise_date.replace(year=current_year)
else:
promise_date = None
if promise_date:
time_delta = (promise_date - reply_date).days
delivery_promise = f"承诺时效: {time_delta}天"
else:
delivery_promise = None
except:
delivery_promise = None
result = {
'username': username.strip(),
'datetime': f"{date} {time}",
'message': message.strip(),
'price': price,
'delivery_time': delivery_time,
'delivery_promise': delivery_promise,
'from': from_loc,
'to': to_loc,
'weight': weight,
'volume': volume
}
results.append(result)
return results
# 使用示例:
# 假设 chat_log_text 是包含聊天记录的字符串
# extracted_data = extract_logistics_info(chat_log_text)
# for item in extracted_data:
# print(item)
```