Rust 初学 之 库fs、anyhow、csv、serde、serde_json
/ ensure!宏:条件断言ensure!(b!= 0.0, "除数不能为零");// bail!宏:立即返回错误bail!("输入包含 NaN: a={}, b={}", a, b);Ok(a / b)("无效端口号 '{}': {}", s, e))?;// anyhow!宏创建带上下文的错误("端口号 {} 为系统保留端口", port));
·
目录
`create_dir` / `create_dir_all`
`std::fs` 所有操作返回 `std::io::Result`,即 `Result`
2.1 `anyhow::Result` 与 `Context` 链式处理
一、Rust 文件系统操作:`std::fs`
1.1 文件读写基础
-
`read_to_string` - 一次性读取整个文件
use std::fs;
// 最简单的方式,但需注意大文件内存占用
let content = fs::read_to_string("data.txt")?;
println!("文件内容: {}", content);
// 等价于:
let content = match fs::read_to_string("data.txt") {
Ok(c) => c,
Err(e) => return Err(e.into()),
};
适用场景:
配置文件、小文本文件(<100MB)
注意事项:
大文件会导致内存溢出,应使用流式读取
-
`write` - 快速写入文件
use std::fs;
// 覆盖写入(自动创建或清空文件)
fs::write("output.txt", "Hello, Rust!")?;
// 写入字节数据
let data = vec![0u8; 1024];
fs::write("binary.dat", &data)?;
特点:
- 原子操作(先写入临时文件,再重命名)
- 完全覆盖原有内容
- 自动处理文件创建和权限
-
流式读写(大文件推荐)
use std::fs::File;
use std::io::{BufRead, BufReader, Write, BufWriter};
// 高效读取大文件
let file = File::open("large.log")?;
let reader = BufReader::new(file);
for line in reader.lines() {
let line = line?;
// 逐行处理,内存占用恒定
}
// 高效写入
let file = File::create("output.txt")?;
let mut writer = BufWriter::new(file);
writer.write_all(b"缓冲写入,减少系统调用")?;
writer.flush()?; // flush确保写入磁盘
1.2 目录操作
-
`create_dir` / `create_dir_all`
use std::fs;
// 创建单级目录(父目录必须存在)
fs::create_dir("new_folder")?;
// 递归创建多级目录(推荐)
fs::create_dir_all("path/to/nested/folder")?;
-
`read_dir` - 遍历目录
use std::fs;
// 基础遍历
for entry in fs::read_dir(".")? {
let entry = entry?;
let path = entry.path();
let name = entry.file_name();
println!("{:?} - {}", path, name.to_string_lossy());
}
// 实用模式:过滤特定类型
let rust_files: Vec<_> = fs::read_dir("./src")?
.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| p.extension().map_or(false, |e| e == "rs"))
.collect();
1.3 文件元数据与权限
`metadata` - 获取文件信息
use std::fs;
let metadata = fs::metadata("file.txt")?;
// 基本信息
println!("大小: {} bytes", metadata.len());
println!("是否为文件: {}", metadata.is_file());
println!("是否为目录: {}", metadata.is_dir());
// 时间戳(平台相关)
println!("修改时间: {:?}", metadata.modified()?);
println!("访问时间: {:?}", metadata.accessed()?);
println!("创建时间: {:?}", metadata.created()?);
// 权限(Unix)
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let mode = metadata.permissions().mode();
println!("权限: {:o}", mode & 0o777);
}
修改权限
use std::fs;
// 设置只读
let mut permissions = fs::metadata("file.txt")?.permissions();
permissions.set_readonly(true);
fs::set_permissions("file.txt", permissions)?;
// Unix 特定:设置 rwxr-xr-x
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let mut permissions = fs::metadata("script.sh")?.permissions();
permissions.set_mode(0o755);
fs::set_permissions("script.sh", permissions)?;
}
1.4 错误处理:`std::io::Result`
`std::fs` 所有操作返回 `std::io::Result<T>`,即 `Result<T, std::io::Error>`
use std::io::{self, ErrorKind};
match fs::read_to_string("config.toml") {
Ok(content) => content,
Err(e) => match e.kind() {
ErrorKind::NotFound => {
eprintln!("文件不存在,使用默认配置");
String::from("# 默认配置")
}
ErrorKind::PermissionDenied => {
panic!("权限不足: {}", e)
}
_ => panic!("未知错误: {}", e),
},
}
常见 `ErrorKind`:
- `NotFound` - 文件不存在
- `PermissionDenied` - 权限不足
- `AlreadyExists` - 文件已存在(创建时)
- `InvalidInput` / `InvalidData` - 数据格式错误
- `UnexpectedEof` - 意外文件结束
二、错误处理简化:`anyhow`
2.1 `anyhow::Result` 与 `Context` 链式处理
use anyhow::{Result, Context};
// 函数签名简化:无需定义复杂错误枚举
fn process_file(path: &str) -> Result<String> {
// 每一步自动转换错误类型,并添加上下文
let content = fs::read_to_string(path)
.with_context(|| format!("读取文件失败: {}", path))?;
let data: Config = toml::from_str(&content)
.context("解析 TOML 配置失败")?;
validate_config(&data)
.context("配置验证失败")?;
Ok(content)
}
fn main() -> Result<()> {
let result = process_file("app.toml")
.context("应用初始化失败")?;
println!("处理结果: {}", result);
Ok(())
}
输出示例:
Error: 应用初始化失败Caused by:
0: 读取文件失败: app.toml
1: No such file or directory (os error 2)
2.2 自定义错误与 `anyhow!` 宏
use anyhow::{anyhow, bail, ensure, Result};
fn divide(a: f64, b: f64) -> Result<f64> {
// ensure! 宏:条件断言
ensure!(b != 0.0, "除数不能为零");
// bail! 宏:立即返回错误
if a.is_nan() || b.is_nan() {
bail!("输入包含 NaN: a={}, b={}", a, b);
}
Ok(a / b)
}
fn parse_port(s: &str) -> Result<u16> {
let port: u16 = s.parse()
.map_err(|e| anyhow!("无效端口号 '{}': {}", s, e))?;
// anyhow! 宏创建带上下文的错误
if port < 1024 {
return Err(anyhow!("端口号 {} 为系统保留端口", port));
}
Ok(port)
}
2.3 与 `std::fs` 结合实现健壮操作
use anyhow::{Result, Context};
use std::fs::{self, File};
use std::io::Write;
use std::path::Path;
/// 安全写入:先写临时文件,再原子重命名
pub fn atomic_write(path: &Path, content: &[u8]) -> Result<()> {
// 创建临时文件
let temp_path = path.with_extension("tmp");
let mut temp_file = File::create(&temp_path)
.with_context(|| format!("创建临时文件失败: {:?}", temp_path))?;
temp_file.write_all(content)
.context("写入临时文件失败")?;
// 确保数据落盘
temp_file.sync_all()
.context("同步文件到磁盘失败")?;
drop(temp_file); // 关闭文件句柄
// 原子重命名(覆盖目标文件)
fs::rename(&temp_path, path)
.with_context(|| format!("重命名失败: {:?} -> {:?}", temp_path, path))?;
Ok(())
}
/// 递归复制目录
pub fn copy_dir_all(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> {
fs::create_dir_all(&dst)
.with_context(|| format!("创建目标目录失败: {:?}", dst.as_ref()))?;
for entry in fs::read_dir(&src)
.with_context(|| format!("读取源目录失败: {:?}", src.as_ref()))?
{
let entry = entry.context("读取目录项失败")?;
let path = entry.path();
let file_name = entry.file_name();
let dest_path = dst.as_ref().join(&file_name);
if path.is_dir() {
copy_dir_all(&path, &dest_path)
.with_context(|| format!("复制子目录失败: {:?} -> {:?}", path, dest_path))?;
} else {
fs::copy(&path, &dest_path)
.with_context(|| format!("复制文件失败: {:?} -> {:?}", path, dest_path))?;
}
}
Ok(())
}
三、CSV 数据处理:`csv` 库
3.1 读取 CSV:Reader 与反序列化
-
基础读取
use csv::Reader;
use std::fs::File;
fn read_csv_basic(path: &str) -> anyhow::Result<()> {
let file = File::open(path)?;
let mut reader = Reader::from_reader(file);
// 读取表头
let headers = reader.headers()?.clone();
println!("表头: {:?}", headers);
// 逐行读取为字符串记录
for result in reader.records() {
let record = result?;
println!("{:?}", record);
// 通过索引访问: record[0], record[1]
}
Ok(())
}
-
反序列化到结构体(推荐)
use csv::Reader;
use serde::Deserialize;
use std::fs::File;
#[derive(Debug, Deserialize)]
struct User {
id: u32,
name: String,
email: String,
#[serde(default)] // 字段缺失时使用默认值
age: Option<u8>,
#[serde(rename = "created_at")] // 字段名映射
created: String,
}
fn read_users(path: &str) -> anyhow::Result<Vec<User>> {
let file = File::open(path)?;
let mut reader = Reader::from_reader(file);
// 自动反序列化
let users: Vec<User> = reader
.deserialize()
.filter_map(|r| r.ok()) // 跳过解析失败的行
.collect();
Ok(users)
}
-
处理大文件(流式处理)
use csv::Reader;
fn process_large_csv(path: &str) -> anyhow::Result<u64> {
let mut reader = Reader::from_path(path)?;
let mut count = 0u64;
// 使用迭代器,内存占用恒定
for result in reader.deserialize::<User>() {
let user = result?;
// 处理每一行,不存储全部数据
if user.age.unwrap_or(0) >= 18 {
count += 1;
}
// 每1000行打印进度
if count % 1000 == 0 {
println!("已处理 {} 行", count);
}
}
Ok(count)
}
3.2 写入 CSV:Writer 与序列化
use csv::Writer;
use serde::Serialize;
#[derive(Serialize)]
struct Product {
id: u32,
name: String,
price: f64,
in_stock: bool,
}
fn write_products(path: &str) -> anyhow::Result<()> {
let mut writer = Writer::from_path(path)?;
// 手动写入
writer.write_record(&["id", "name", "price", "in_stock"])?;
writer.write_record(&["1", "Laptop", "999.99", "true"])?;
// 从结构体序列化
let products = vec![
Product { id: 2, name: "Mouse".into(), price: 29.99, in_stock: true },
Product { id: 3, name: "Keyboard".into(), price: 79.99, in_stock: false },
];
for product in products {
writer.serialize(product)?;
}
writer.flush()?; // 确保写入
Ok(())
}
3.3 性能优化技巧
use csv::ReaderBuilder;
fn optimized_read(path: &str) -> anyhow::Result<()> {
let mut reader = ReaderBuilder::new()
.has_headers(true)
.delimiter(b',') // 自定义分隔符
.quote_style(csv::QuoteStyle::Necessary) // 最小化引号
.buffer_capacity(1024 * 1024) // 1MB 缓冲区
.from_path(path)?;
// 使用 StringRecord 重用内存,减少分配
let mut record = csv::StringRecord::new();
while reader.read_record(&mut record)? {
// 处理 record,内存重用
}
Ok(())
}
四、序列化与反序列化:`serde`
4.1 核心派生宏
use serde::{Serialize, Deserialize};
#[derive(Debug, Serialize, Deserialize)]
struct Config {
pub app_name: String,
pub version: String,
#[serde(default = "default_port")]
pub port: u16,
#[serde(skip_serializing_if = "Vec::is_empty")] // 空时不序列化
pub features: Vec<String>,
#[serde(skip)] // 完全跳过该字段
pub runtime_data: Option<String>,
}
fn default_port() -> u16 {
8080
}
4.2 JSON 与 CSV 互转
use csv::Reader;
use serde_json::Value;
fn csv_to_json(csv_path: &str) -> anyhow::Result<String> {
let mut reader = Reader::from_path(csv_path)?;
let headers = reader.headers()?.clone();
let mut records: Vec<Value> = Vec::new();
for result in reader.records() {
let record = result?;
let mut obj = serde_json::Map::new();
for (header, value) in headers.iter().zip(record.iter()) {
obj.insert(header.to_string(), Value::String(value.to_string()));
}
records.push(Value::Object(obj));
}
Ok(serde_json::to_string_pretty(&records)?)
}
4.3 自定义序列化逻辑
use serde::{Serialize, Deserialize, Serializer, Deserializer};
use serde::de::Error;
use chrono::{DateTime, Utc, TimeZone};
#[derive(Debug)]
struct Timestamp(DateTime<Utc>);
impl Serialize for Timestamp {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_i64(self.0.timestamp())
}
}
impl<'de> Deserialize<'de> for Timestamp {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let secs = i64::deserialize(deserializer)?;
Ok(Timestamp(Utc.timestamp_opt(secs, 0).single().ok_or(D::Error::custom("无效时间戳"))?))
}
}
4.4 常用字段注解
| 注解 | 作用 |
|---|---|
| `#[serde(rename = "new_name")]` | 字段重命名 |
| `#[serde(alias = "old_name")]` | 接受多个别名 |
| `#[serde(skip)]` | 跳过序列化和反序列化 |
| `#[serde(skip_serializing)]` | 仅跳过序列化 |
| `#[serde(skip_deserializing)]` | 仅跳过反序列化 |
| `#[serde(default)]` | 缺失时使用 Default::default |
| `#[serde(default = "path")]` | 指定默认函数 |
| `#[serde(flatten)]` | 平铺嵌套结构 |
| `#[serde(untagged)]` | 无标签枚举(根据内容推断) |
| `#[serde(tag = "type")]` | 内部标签枚举 |
| `#[serde(from = "FromType")]` | 通过 From 转换 |
| `#[serde(into = "IntoType")]` | 通过 Into 转换 |
五、JSON 处理:`serde_json`
5.1 基础操作
use serde_json::{json, Value, Map};
// 快速创建 JSON
let data = json!({
"name": "Rust",
"features": ["安全", "并发", "高性能"],
"version": {
"major": 1,
"minor": 75
}
});
// 序列化到字符串
let json_str = serde_json::to_string(&data)?;
let pretty_json = serde_json::to_string_pretty(&data)?;
// 反序列化
let parsed: Value = serde_json::from_str(&json_str)?;
let name = parsed["name"].as_str().unwrap_or("unknown");
// 强类型反序列化
let config: Config = serde_json::from_str(&json_str)?;
5.2 流式处理(大 JSON)
use serde_json::Deserializer;
use std::fs::File;
use std::io::BufReader;
fn stream_process_json(path: &str) -> anyhow::Result<()> {
let file = File::open(path)?;
let reader = BufReader::new(file);
// 流式解析数组
let stream = Deserializer::from_reader(reader).into_iter::<Value>();
for value in stream {
let item = value?;
process_item(item)?; // 逐个处理,不加载整个文件
}
Ok(())
}
5.3 与文件操作结合
use anyhow::{Context, Result};
use serde::{Serialize, Deserialize};
use std::fs;
use std::path::Path;
#[derive(Serialize, Deserialize, Default)]
pub struct AppConfig {
pub database_url: String,
pub log_level: String,
pub max_connections: u32,
}
impl AppConfig {
/// 从文件加载,不存在则创建默认配置
pub fn load_or_create<P: AsRef<Path>>(path: P) -> Result<Self> {
let path = path.as_ref();
if path.exists() {
let content = fs::read_to_string(path)
.with_context(|| format!("读取配置失败: {:?}", path))?;
let config: AppConfig = serde_json::from_str(&content)
.context("解析配置 JSON 失败")?;
Ok(config)
} else {
let config = AppConfig::default();
config.save(path)
.with_context(|| format!("创建默认配置失败: {:?}", path))?;
Ok(config)
}
}
pub fn save<P: AsRef<Path>>(&self, path: P) -> Result<()> {
let json = serde_json::to_string_pretty(self)
.context("序列化配置失败")?;
fs::write(path.as_ref(), json)
.with_context(|| format!("写入配置失败: {:?}", path.as_ref()))?;
Ok(())
}
}
六、综合案例:配置文件管理工具
use anyhow::{Context, Result};
use csv::Reader;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fs;
use std::path::Path;
/// 用户数据源(CSV 或 JSON)
#[derive(Debug, Serialize, Deserialize)]
struct User {
id: u32,
username: String,
email: String,
active: bool,
}
/// 应用配置
#[derive(Debug, Serialize, Deserialize, Default)]
struct Settings {
output_dir: String,
default_format: String,
mappings: HashMap<String, String>,
}
struct DataProcessor {
settings: Settings,
}
impl DataProcessor {
fn new(config_path: &Path) -> Result<Self> {
let settings = if config_path.exists() {
let content = fs::read_to_string(config_path)
.with_context(|| format!("读取配置: {:?}", config_path))?;
serde_json::from_str(&content)
.context("解析配置")?
} else {
Settings::default()
};
Ok(Self { settings })
}
/// 处理 CSV 并输出 JSON
fn process_csv_to_json(&self, input: &Path, output: &Path) -> Result<usize> {
let mut reader = Reader::from_path(input)
.with_context(|| format!("打开 CSV: {:?}", input))?;
let users: Vec<User> = reader
.deserialize()
.collect::<Result<Vec<_>, _>>()
.context("解析 CSV 记录")?;
let count = users.len();
let json = serde_json::to_string_pretty(&users)
.context("序列化为 JSON")?;
fs::write(output, json)
.with_context(|| format!("写入输出文件: {:?}", output))?;
println!("成功处理 {} 条记录", count);
Ok(count)
}
}
fn main() -> Result<()> {
let processor = DataProcessor::new(Path::new("config.json"))?;
processor.process_csv_to_json(
Path::new("input.csv"),
Path::new("output.json")
)?;
Ok(())
}
更多推荐


所有评论(0)