以上是一份聊天记录,如你所见除了部分无关聊天外,充斥着关于物...

Author:匿名用户
2026/01/05 09:12

Description

编写Python代码从聊天记录中自动提取物流价格、时效和地点等结构化信息,支持中英文单位转换和数据验证。

Tags

代码编程提取信息内容生成

Content

```python
# 提取物流价格时效信息的Python代码

import re
from datetime import datetime

def extract_logistics_info(chat_log):
    """
    从聊天记录中提取物流价格时效信息
    参数:
        chat_log (str): 原始聊天记录文本
    返回:
        list: 包含提取信息的字典列表
    """
    # 初始化结果列表
    results = []
    
    # 定义匹配客服回复的正则表达式
    # 匹配格式如: "用户名 日期 时间 消息内容"
    pattern = r'^(.*?)\(.*?\)\s+(\d{1,2}/\d{1,2})\s+(\d{1,2}:\d{1,2}:\d{1,2})\s+(.*)$'
    
    # 处理每行聊天记录
    for line in chat_log.split('\n'):
        if not line.strip():
            continue
            
        match = re.match(pattern, line)
        if match:
            username, date, time, message = match.groups()
            
            # 只处理客服的回复 (假设客服用户名包含"斯比特"或"客服")
            if '斯比特' in username or '客服' in username:
                # 提取价格信息 (匹配数字+货币符号)
                price_match = re.search(r'(\d+\.?\d*)\s*(元|RMB|¥|¥)', message)
                price = price_match.group(0) if price_match else None
                
                # 提取时效信息 (匹配"X天"或"X-Y天"等)
                delivery_match = re.search(r'(\d+[-~]?\d*)\s*(天|日|工作日)', message)
                delivery_time = delivery_match.group(0) if delivery_match else None
                
                # 提取出发地/目的地 (简单匹配)
                from_match = re.search(r'从(.*?)[发运|到|至]', message)
                from_loc = from_match.group(1).strip() if from_match else None
                
                to_match = re.search(r'[到|至|发往](.*?)(?:$|,|。|;)', message)
                to_loc = to_match.group(1).strip() if to_match else None
                
                # 提取体积重量参数
                weight_match = re.search(r'(\d+\.?\d*)\s*(kg|KG|千克|公斤)', message)
                weight = weight_match.group(0) if weight_match else None
                
                volume_match = re.search(r'(\d+\.?\d*)\s*(m³|立方米|方)', message)
                volume = volume_match.group(0) if volume_match else None
                
                # 如果有提取到任何信息,则添加到结果
                if any([price, delivery_time, from_loc, to_loc, weight, volume]):
                    # 计算承诺时效 (基于回复时间)
                    try:
                        reply_date = datetime.strptime(f"{date} {time}", "%m/%d %H:%M:%S")
                        if '今天' in message or '当天' in message:
                            promise_date = reply_date
                        elif '明天' in message:
                            promise_date = reply_date.replace(day=reply_date.day+1)
                        else:
                            # 匹配具体日期如"21号交货"
                            date_match = re.search(r'(\d{1,2})[号日]交', message)
                            if date_match:
                                delivery_day = int(date_match.group(1))
                                current_year = datetime.now().year
                                promise_date = datetime.strptime(f"{date.split('/')[0]}/{delivery_day} 23:59:59", "%m/%d %H:%M:%S")
                                promise_date = promise_date.replace(year=current_year)
                            else:
                                promise_date = None
                        
                        if promise_date:
                            time_delta = (promise_date - reply_date).days
                            delivery_promise = f"承诺时效: {time_delta}天"
                        else:
                            delivery_promise = None
                    except:
                        delivery_promise = None
                    
                    result = {
                        'username': username.strip(),
                        'datetime': f"{date} {time}",
                        'message': message.strip(),
                        'price': price,
                        'delivery_time': delivery_time,
                        'delivery_promise': delivery_promise,
                        'from': from_loc,
                        'to': to_loc,
                        'weight': weight,
                        'volume': volume
                    }
                    results.append(result)
    
    return results


# 使用示例:
# 假设 chat_log_text 是包含聊天记录的字符串
# extracted_data = extract_logistics_info(chat_log_text)
# for item in extracted_data:
#     print(item)
```