DMM ミストトレインガールズ〜霧の世界の車窓から〜 X 抓包求助

网页版url
import os
import json
import re

def extract_still_config_urls():
    # 配置参数
    search_str = "catalog_ex_hd"
    target_key = "m_InternalIdPrefixes"
    start_str = "Assets"
    url_prefix_base = "https://assets4.mist-train-girls.com/production-client-web-assets/Spines/Stills/{character}/{number}/still_configuration.json"
    output_file = "output_still_configuration.txt"
    
    # 存储所有提取的URL
    all_urls = []
    
    # 获取当前目录下所有包含指定字符串的JSON文件
    for filename in os.listdir('.'):
        if (search_str in filename) and filename.endswith('.json'):
            print(f"处理文件: {filename}")
            
            try:
                # 读取JSON文件
                with open(filename, 'r', encoding='utf-8') as f:
                    data = json.load(f)
                
                # 检查是否包含目标键且为列表
                if target_key in data and isinstance(data[target_key], list):
                    # 提取以Assets开头的字符串
                    for item in data[target_key]:
                        if isinstance(item, str) and item.startswith(start_str):
                            # 按'/'分割路径
                            path_parts = item.split('/')
                            # 确保路径至少有足够的部分(倒数第二个和最后一个)
                            if len(path_parts) >= 2:
                                # 如果倒数第二个部分是"Main",则改为0
                                character = path_parts[-2] if path_parts[-2] != "Main" else "0"
                                number = path_parts[-1]
                                # 生成完整URL
                                full_url = url_prefix_base.format(character=character, number=number)
                                all_urls.append(full_url)
            
            except json.JSONDecodeError:
                print(f"警告: {filename} 不是有效的JSON文件,已跳过")
            except Exception as e:
                print(f"处理 {filename} 时出错: {str(e)}")
    
    # 将结果写入输出文件
    with open(output_file, 'w', encoding='utf-8') as f:
        for url in all_urls:
            f.write(url + '\n')
    
    print(f"处理完成,共提取 {len(all_urls)} 个URL,已保存到 {output_file}")

if __name__ == "__main__":
    extract_still_config_urls()

可以提取出still_configuration.json的url。有4个失败了,与其他url格式不同。