使用Java调用HDFS的API进行文件基本操作

algorain

使用Java调用HDFS的API进行文件基本操作

本样例主要是代码,其中包括了Java进行上传,下载,删除,创建文件夹,遍历文件夹等操作,代码环境基于MacOS,IDEA,使用Maven来配置依赖包,后面我会放出代码,和pom.xml。Hadoop是2.6 CDH版本。代码中的hmaster是我配置的虚拟机IP地址,需要修改为你的虚拟机IP地址。

HDFSUtil.java

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
package com.rain.hdfs;

import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.junit.Before;
import org.junit.Test;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;

public class HDFSUtil {

FileSystem fs = null;



// 这是先行运行的函数,在其他函数运行之前,将配置写好
@Before
public void init() throws Exception{

//读取classpath下的xxx-site.xml 配置文件,并解析其内容,封装到conf对象中
// 可以将core-site.xml导入到项目中
Configuration conf = new Configuration();

//也可以在代码中对conf中的配置信息进行手动设置,会覆盖掉配置文件中的读取的值
conf.set("fs.defaultFS", "hdfs://hmaster:9000/");

//根据配置信息,去获取一个具体文件系统的客户端操作实例对象
fs = FileSystem.get(new URI("hdfs://hmaster:9000/"),conf,"hadoop");

}



/**
* 上传文件,比较底层的写法
*
* @throws Exception
*/
@Test
public void upload_old() throws Exception {

Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://hmaster:9000/");

FileSystem fs = FileSystem.get(conf);

Path dst = new Path("hdfs://hmaster:9000/data/sample.txt");

FSDataOutputStream os = fs.create(dst);

FileInputStream is = new FileInputStream("/Users/rain/Downloads/sample.txt");

IOUtils.copy(is, os);


}

/**
* 上传文件,封装好的写法
* @throws Exception
* @throws IOException
*/
@Test
public void upload() throws Exception, IOException{

// 前面为上传文件的路径,后面是HDFS中的路径和文件的新名称
fs.copyFromLocalFile(new Path("/Users/rain/Downloads/sample.txt"), new Path("hdfs://hmaster:9000/data/test.txt"));

}


/**
* 下载文件
* @throws Exception
* @throws IllegalArgumentException
*/
@Test
public void download() throws Exception {
// 前面是HDFS中文件的路径,后面是要下载到本地的路径以及名称
fs.copyToLocalFile(new Path("hdfs://hmaster:9000/sample.txt"), new Path("/Users/rain/Downloads/sample.txt"));

}

/**
* 查看文件信息
* @throws IOException
* @throws IllegalArgumentException
* @throws FileNotFoundException
*
*/
@Test
public void listFiles() throws FileNotFoundException, IllegalArgumentException, IOException {

// listFiles列出的是文件信息,而且提供递归遍历
RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path("/data"), true);

while(files.hasNext()){

LocatedFileStatus file = files.next();
Path filePath = file.getPath();
String fileName = filePath.getName();
System.out.println(fileName);

}

System.out.println("---------------------------------");

//listStatus 可以列出文件和文件夹的信息,但是不提供自带的递归遍历
FileStatus[] listStatus = fs.listStatus(new Path("/data"));
for(FileStatus status: listStatus){

String name = status.getPath().getName();
System.out.println(name + (status.isDirectory()?" is dir":" is file"));

}

}

/**
* 创建文件夹
* @throws Exception
* @throws IllegalArgumentException
*/
@Test
public void mkdir() throws IllegalArgumentException, Exception {

fs.mkdirs(new Path("/data/test"));


}

/**
* 删除文件或文件夹
* @throws IOException
* @throws IllegalArgumentException
*/
@Test
public void rm() throws IllegalArgumentException, IOException {

fs.delete(new Path("/data/test"), true);

}


public static void main(String[] args) throws Exception {


}



}

pom.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.rain.hadoop</groupId>
<artifactId>rain-hadoop</artifactId>
<version>1.0-SNAPSHOT</version>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>6</source>
<target>6</target>
</configuration>
</plugin>
</plugins>
</build>


<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<hadoop.version>2.6.0-cdh5.7.0</hadoop.version>
</properties>

<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
</repository>
</repositories>

<dependencies>
<!--添加依赖-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0-cdh5.7.0</version>
</dependency>

<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.2.0-cdh5.7.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.0-cdh5.7.0</version>
</dependency>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
</dependencies>

</project>
  • Title: 使用Java调用HDFS的API进行文件基本操作
  • Author: algorain
  • Created at: 2018-08-12 17:32:47
  • Updated at: 2023-05-14 21:39:50
  • Link: http://www.rain1024.com/2018/08/12/article133/
  • License: This work is licensed under CC BY-NC-SA 4.0.
 Comments