一、为什么我们总在深夜调试文件上传?

去年双十一大促前夕,我们的电商平台遭遇了一个诡异现象:商品视频上传功能在白天频繁失败,却在深夜运行顺畅。经过通宵排查发现了三个关键问题:海量请求的排队拥堵、大文件传输的断连风险、服务器内存的瞬时压力。这让我意识到文件上传并非简单的POST请求,而是一门需要精雕细琢的传输艺术。

二、分片上传:把大象装进冰箱的正确姿势

2.1 分片策略的三层盔甲

技术栈:Express + multer + crypto

完整的文件传输方案应当包含:

  1. 前端的分片处理
  2. 后端的碎片合并
  3. 校验机制的三重保障
// 前端分片处理逻辑(Vue3示例)
const handleFileSplit = async (file) => {
  const chunkSize = 2 * 1024 * 1024 // 2MB分片
  const totalChunks = Math.ceil(file.size / chunkSize)
  const fileHash = await calculateMD5(file) // 计算文件指纹
  
  for (let i = 0; i < totalChunks; i++) {
    const chunk = file.slice(i * chunkSize, (i + 1) * chunkSize)
    const formData = new FormData()
    formData.append('chunk', chunk)
    formData.append('hash', fileHash)
    formData.append('index', i)
    
    await axios.post('/upload-chunk', formData, {
      headers: {'Content-Type': 'multipart/form-data'}
    })
  }
}

// MD5计算函数(Web Worker优化)
const calculateMD5 = (file) => {
  return new Promise((resolve) => {
    const worker = new Worker('/md5-worker.js')
    worker.postMessage(file)
    worker.onmessage = (e) => resolve(e.data)
  })
}
// 服务端碎片处理(Node.js核心逻辑)
const storage = multer.diskStorage({
  destination: (req, file, cb) => {
    const { hash } = req.body
    const chunkDir = `uploads/${hash}`
    !fs.existsSync(chunkDir) && fs.mkdirSync(chunkDir)
    cb(null, chunkDir)
  },
  filename: (req, file, cb) => {
    const { index } = req.body
    cb(null, `${index}.part`)
  }
})

app.post('/merge', async (req, res) => {
  const { hash, filename } = req.body
  const chunkDir = `uploads/${hash}`
  const chunks = fs.readdirSync(chunkDir)
  
  // 按序号排序碎片文件
  chunks.sort((a, b) => a.split('.')[0] - b.split('.')[0])
  
  // 流式合并文件
  const writeStream = fs.createWriteStream(`completed/${filename}`)
  for (const chunk of chunks) {
    const chunkPath = `${chunkDir}/${chunk}`
    await new Promise(resolve => {
      fs.createReadStream(chunkPath)
        .pipe(writeStream, { end: false })
        .on('finish', resolve)
    })
  }
  
  writeStream.end()
  res.status(200).json({ success: true })
})

三、断点续传:网络波动中的生存智慧

3.1 断点续传双保险机制

技术栈:Redis + 文件校验

// 续传服务中间件
app.post('/check-chunk', async (req, res) => {
  const { hash } = req.body
  const redisClient = createRedisClient()
  
  // 从Redis读取已上传分片记录
  const uploaded = await redisClient.lRange(hash, 0, -1)
  const chunkDir = `uploads/${hash}`
  
  // 双重验证机制
  if(uploaded.length > 0 && fs.existsSync(chunkDir)) {
    return res.json({ uploaded: uploaded.map(Number) })
  }
  
  // 初始化Redis记录
  await redisClient.del(hash)
  res.json({ uploaded: [] })
})

// 上传成功回调处理
app.post('/upload-chunk', upload.single('chunk'), async (req, res) => {
  const { hash, index } = req.body
  const redisClient = createRedisClient()
  
  // 将成功索引存入Redis列表
  await redisClient.lPush(hash, index.toString())
  await redisClient.expire(hash, 86400) // 24小时有效期
  
  res.status(200).json({ code: 0 })
})

四、并发控制:打开处理能力的正确方式

4.1 令牌桶算法实战

class UploadScheduler {
  constructor(maxConcurrent) {
    this.max = maxConcurrent
    this.queue = []
    this.activeCount = 0
  }

  addTask(task) {
    return new Promise((resolve, reject) => {
      const execute = async () => {
        this.activeCount++
        try {
          const result = await task()
          resolve(result)
        } catch (error) {
          reject(error)
        } finally {
          this.activeCount--
          this.runNext()
        }
      }
      
      if (this.activeCount < this.max) {
        execute()
      } else {
        this.queue.push(execute)
      }
    })
  }

  runNext() {
    if (this.queue.length > 0) {
      const nextExecute = this.queue.shift()
      nextExecute()
    }
  }
}

// 使用示例
const scheduler = new UploadScheduler(3)

files.forEach(file => {
  scheduler.addTask(() => uploadFile(file))
    .then(() => console.log('上传成功'))
    .catch(console.error)
})

五、性能调优的战场法则

5.1 实战经验总结

  • 内存控制:流式处理永远比Buffer缓存更安全
  • 异常熔断:当失败率超过阈值时自动降级
  • 集群部署:Nginx反向代理实现负载均衡
  • 监控预警:ElasticSearch记录传输日志
// 传输日志埋点示例
app.use((req, res, next) => {
  const start = Date.now()
  
  res.on('finish', () => {
    const logEntry = {
      method: req.method,
      url: req.url,
      status: res.statusCode,
      duration: Date.now() - start,
      timestamp: new Date()
    }
    
    // 写入ES日志系统
    elasticClient.index({
      index: 'upload-logs',
      body: logEntry
    })
  })
  
  next()
})

六、最佳适配场景剖析

  1. 医疗影像PACS系统传输
  2. 4K视频制作云端协作
  3. 工业设计三维模型同步
  4. 物联网设备海量日志回传