樱境物语 protobuf 分析

随着nutaku美洲服1.0.0版本的开服樱境全版本都采用了在线获取文件清单的方法

5月份的一次更新后index.txt文件不再打包在安装包中
在5月份我通过抓包修改包的16进制内容替换版本号可以做到获取指定版本的资源清单
不过众所周知nutaku一直都在NS的游戏上搞特殊例如把 潘吉亚异闻录 的小雷全部换成大雷
在分析nutaku的index请求数据时发现他和erolab使用了同一个域名不过数据包结构上稍有不同
初步分析这个数据包是varint编码明显是protobuf编码后的产物
尝试解析(只能靠猜)其包含了几个数据常量 时间戳 版本号以及平台 语言等数据
如(erolab的数据包解析)

import binascii

hex_data = (
    "08c785061a022d3150ffffffffffffffffff"
    "ffff0160a6f1c5c5903372203243313344"
    "3445414433343931464231414439334332"
    "41424130333033434533baac3017080212"
    "0d312e372e302d656c2d682d616e1a047a"
    "68636e"
)

data = binascii.unhexlify(hex_data)

def read_varint(buf, offset):
    result = 0
    shift = 0
    while True:
        b = buf[offset]
        offset += 1
        result |= (b & 0x7F) << shift
        if not (b & 0x80):
            break
        shift += 7
    return result, offset

def parse_protobuf(data):
    offset = 0
    fields = []
    while offset < len(data):
        key, offset = read_varint(data, offset)
        field_number = key >> 3
        wire_type = key & 0x07

        value = None
        if wire_type == 0:
            value, offset = read_varint(data, offset)
        elif wire_type == 2:
            length, offset = read_varint(data, offset)
            value = data[offset:offset + length]
            offset += length
        else:
            break

        fields.append((field_number, wire_type, value))
    return fields

fields = parse_protobuf(data)

# 打印结果
for num, wire_type, value in fields:
    if isinstance(value, bytes):
        try:
            decoded = value.decode('utf-8')
            print(f"Field {num} (len-delimited): {decoded}")
        except UnicodeDecodeError:
            print(f"Field {num} (len-delimited HEX): {value.hex()}")
    else:
        print(f"Field {num} (varint): {value}")

Field 1 (varint): 99015
Field 3 (len-delimited): -1
Field 10 (varint): 302231454903657293676543
Field 12 (varint): 1756787472550
Field 14 (len-delimited): 2C13D4EAD3491FB1AD93C2ABA0303CE3
1.7.0-el-h-anzhcn

可以注意到Field 12实际是一个毫秒级的Unix时间戳
有无大佬可以逆向出他的protobuf结构
尝试逆向他的安卓so文件发现是加固后的或许只能从webgl版下手分析

\x50\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01

这段protobuf超过varint上限了,不知道要怎么支持

去掉之后用protodeep可以得到

[1]  1 -> <int> = 99015 [1]
[1]  3 -> <string> = "-1" [3]
[1]  12 -> <int> = 1756787472550 [12]
[1]  14 -> <string> = "2C13D4EAD3491FB1AD93C2ABA0303CE3" [14]
[2]  99015,1 -> <int> = 2 [99015,1]
[2]  99015,2 -> <string> = "1.7.0-el-h-an" [99015,2]
[2]  99015,3 -> <string> = "zhcn" [99015,3]

没什么特别的

cpdef bytes build_protobuf(version: bytes):
    cdef bytes inner = b""
    cdef bytes outer = b""

    cdef long long timestamp = int(pytime.time() * 1000)


    inner += encode_varint(1 << 3 | 0) + encode_varint(2)
    inner += encode_varint(2 << 3 | 2) + encode_length_delimited(version)
    inner += encode_varint(3 << 3 | 2) + encode_length_delimited(b"zhcn")

    outer += encode_varint(1 << 3 | 0) + encode_varint(99015)
    outer += encode_varint(3 << 3 | 2) + encode_length_delimited(b"-1")
    outer += encode_varint(10 << 3 | 0) + encode_varint(int("18446744073709551615"))
    outer += encode_varint(12 << 3 | 0) + encode_varint(timestamp)
    outer += encode_varint(14 << 3 | 2) + encode_length_delimited(
        b"2C13D4EAD3491FB1AD93C2ABA0303CE3"
    )
    outer += encode_varint(99015 << 3 | 2) + encode_length_delimited(inner)
    return outer

这样似乎是没啥问题了

解析响应如

08c885068201440a2035364345463041423344463936303439324635323034433546394641324546321220463637344630423646463636303734363339354245433334394643383431343692ac30140801080108010801080108010801080108010800c2ac309b040800121667616d652d63742d6c6162732e65636368692e78787818e00e220fe7b3bbe7bb9fe7bbb4e68aa4e4b8ad280132cd027b225775446f75505650223a202231222c2022646f776e6c6f6164223a202268747470733a2f2f6c2e6879656e61646174612e636f6d2f732f32373049756c222c2022505645566572696679223a202231222c2022505650566572696679223a202231222c202262616e6e657243646e223a202268747470733a2f2f677a74712e67656767642e636f6d222c202263646e5f67726f7570223a202231222c2022646e735f67726f7570223a202233222c202259696d6f426174746c65223a202231222c20224e6f746966636174696f6e223a202231222c202255736548747470466163746f7279223a202230222c2022616e616c79736973537461747573223a202231222c202245726f6c61627350726f6d6f74696f6e55726c223a202268747470733a2f2f7777772e65726f2d6c6162732e636f6d2f70726f66696c652d70726f6d6f74652e68746d6c227d3a203031633731363166303161393636383763326531363932616530303461623938421667616d652d63742d6c6162732e65636368692e78787848e50e521768747470733a2f2f686566632e6872627a71792e636f6d521f68747470733a2f2f70617463682d63742d6c6162732e65636368692e7878785a07382e382e382e38621667616d652d63742d6c6162732e65636368692e78787868ec0e

可以用

from libc.stdint cimport uint64_t, uint32_t, uint8_t

cdef tuple decode_varint(bytes data, Py_ssize_t offset):
    cdef uint64_t result = 0
    cdef int shift = 0
    cdef uint8_t b
    cdef Py_ssize_t idx = offset

    while True:
        b = data[idx]
        result |= (b & 0x7F) << shift
        idx += 1
        if b < 0x80:
            break
        shift += 7

    return result, idx

cpdef dict decode_protobuf(bytes data, Py_ssize_t offset=0):
    cdef dict result = {}
    cdef uint64_t key, field_number
    cdef uint32_t wire_type
    cdef uint64_t value
    cdef uint64_t length
    cdef bytes raw
    cdef Py_ssize_t idx = offset
    cdef list lst

    while idx < len(data):
        key, idx = decode_varint(data, idx)
        field_number = key >> 3
        wire_type = key & 0x07

        if wire_type == 0:  # varint
            value, idx = decode_varint(data, idx)
            lst = result.setdefault(field_number, [])
            lst.append(value)

        elif wire_type == 2:  # length-delimited
            length, idx = decode_varint(data, idx)
            raw = data[idx:idx + length]
            idx += length
            try:
                decoded = raw.decode("utf-8")
                lst = result.setdefault(field_number, [])
                lst.append(decoded)
            except UnicodeDecodeError:
                nested = decode_protobuf(raw, 0)
                lst = result.setdefault(field_number, [])
                if nested:
                    lst.append(nested)
                else:
                    lst.append(raw)
        else:
            raise ValueError(f"未知 wire_type: {wire_type}")

    return result

cpdef dict parse_response(str hex_str):
    cdef bytes data = bytes.fromhex(hex_str)
    return decode_protobuf(data, 0)

cpdef object extract_field(dict parsed, int field_number):
    return parsed.get(field_number, None)

顺带一提erolab今天才发公告9/10更新1.8.0用这个方法构造数据已经可以拿到1.8.0的资源index了
(其实一个星期前就有了)