网页版url
import os
import json
import re
def extract_still_config_urls():
# 配置参数
search_str = "catalog_ex_hd"
target_key = "m_InternalIdPrefixes"
start_str = "Assets"
url_prefix_base = "https://assets4.mist-train-girls.com/production-client-web-assets/Spines/Stills/{character}/{number}/still_configuration.json"
output_file = "output_still_configuration.txt"
# 存储所有提取的URL
all_urls = []
# 获取当前目录下所有包含指定字符串的JSON文件
for filename in os.listdir('.'):
if (search_str in filename) and filename.endswith('.json'):
print(f"处理文件: {filename}")
try:
# 读取JSON文件
with open(filename, 'r', encoding='utf-8') as f:
data = json.load(f)
# 检查是否包含目标键且为列表
if target_key in data and isinstance(data[target_key], list):
# 提取以Assets开头的字符串
for item in data[target_key]:
if isinstance(item, str) and item.startswith(start_str):
# 按'/'分割路径
path_parts = item.split('/')
# 确保路径至少有足够的部分(倒数第二个和最后一个)
if len(path_parts) >= 2:
# 如果倒数第二个部分是"Main",则改为0
character = path_parts[-2] if path_parts[-2] != "Main" else "0"
number = path_parts[-1]
# 生成完整URL
full_url = url_prefix_base.format(character=character, number=number)
all_urls.append(full_url)
except json.JSONDecodeError:
print(f"警告: {filename} 不是有效的JSON文件,已跳过")
except Exception as e:
print(f"处理 {filename} 时出错: {str(e)}")
# 将结果写入输出文件
with open(output_file, 'w', encoding='utf-8') as f:
for url in all_urls:
f.write(url + '\n')
print(f"处理完成,共提取 {len(all_urls)} 个URL,已保存到 {output_file}")
if __name__ == "__main__":
extract_still_config_urls()
可以提取出still_configuration.json的url。有4个失败了,与其他url格式不同。