文本分割器TXTSpliter
| ?? ? ????當下載的日志文件(文本文件)有幾十M大小的時候,直接用文本編輯器(notepad++)打開會導致卡死。于是寫了一個按字節數均分的文本分割工具TXTSpliterEqualBytes.java?,將文本文件分割成10份(比如原文件50M,分割后生成子文件每個5M)。 下載地址:https://download.csdn.net/download/shushanke/86923522 TXTSpliterEqualBytes? import java.io.BufferedReader; import java.io.Closeable; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.CharacterCodingException; import java.text.DecimalFormat; /* javac -d . -encoding UTF-8 TXTSpliterEqualBytes.javajava TXTSpliterEqualBytes文本切割器(按字節數均分,可能分割后的文件亂碼。比如恰好某個字符不止一個字節,恰好好被分割到兩個文件中。) */ public class TXTSpliterEqualBytes {private static final String dirPath = ".";//當前目錄//private static final int NUMBER_OF_FILES = 10;//分割成N份private static int NUMBER_OF_FILES = 10;//分割成N份private static String absoluteDirPath = "";//原始文件private static String originalFileName = "";private static DecimalFormat format;private static java.util.LinkedHashSet<String> suffixSetOfTXTFile = new java.util.LinkedHashSet<String>();static {suffixSetOfTXTFile.add(".log");suffixSetOfTXTFile.add(".LOG");suffixSetOfTXTFile.add(".txt");suffixSetOfTXTFile.add(".TXT");suffixSetOfTXTFile.add(".text");suffixSetOfTXTFile.add(".TEXT");if (NUMBER_OF_FILES < 10) {format = new DecimalFormat("0");} else if (NUMBER_OF_FILES < 100) {format = new DecimalFormat("00");} else if (NUMBER_OF_FILES < 1000) {format = new DecimalFormat("000");}getabsoluteDirPath();//計算當前目錄的絕對路徑findTXTFile();//查找文本文件(找到當前目錄的第一個文本)}private static String getabsoluteDirPath() {if ("".equals(absoluteDirPath)) {File dir = new File(dirPath);absoluteDirPath = dir.getAbsolutePath();absoluteDirPath = absoluteDirPath.substring(0, absoluteDirPath.length() -1);//System.out.println("absoluteDirPath==" + absoluteDirPath);if (!absoluteDirPath.endsWith(File.separator)) {absoluteDirPath += File.separator;}}return absoluteDirPath;}private static String findTXTFile() {File dir = new File(absoluteDirPath);boolean findTXT = false;for (File file : dir.listFiles()) {if (file.isFile()) {String fileName = file.getName();int index = fileName.lastIndexOf(".");if (index < 1) {continue;}String suffix = fileName.substring(index, fileName.length());if (suffixSetOfTXTFile.contains(suffix)) {originalFileName = fileName;findTXT = true;break;}}}if (!findTXT) {String tipMsg = "ERROR:請將待分割的文本文件" + suffixSetOfTXTFile.toString() + "放到當前目錄下!";System.out.println(tipMsg);throw new RuntimeException(tipMsg);}return absoluteDirPath;}public static void closeCloseable(Closeable closeable) {try {if (closeable != null) {closeable.close();}} catch (Exception e) {e.printStackTrace();}}public static boolean split() {boolean success = false;if (NUMBER_OF_FILES < 2) {System.out.println("分割后的文件個數不能小于2!");return success;}//文件的絕對路徑String filePath = absoluteDirPath + originalFileName;File originalFile = new File(filePath);long sizeTotal = originalFile.length();long sizeEach = sizeTotal / NUMBER_OF_FILES;long remainder = sizeTotal % NUMBER_OF_FILES;long[] sizeArray = new long[NUMBER_OF_FILES];for (int i = 0; i < NUMBER_OF_FILES; i++) {sizeArray[i] = sizeEach;}sizeArray[NUMBER_OF_FILES -1] = sizeEach + remainder;FileChannel inChannel = null;FileChannel outChannel = null;try {int index = originalFileName.lastIndexOf(".");String fileName = originalFileName.substring(0, index);String suffix = originalFileName.substring(index, originalFileName.length());StringBuilder sb = new StringBuilder();inChannel = new FileInputStream(originalFile).getChannel();long offset = 0;for (int i = 0; i < NUMBER_OF_FILES; i++) {sb.setLength(0);sb.append(fileName).append("_").append(format.format(i + 1)).append(suffix);String newFileName = absoluteDirPath + sb.toString();long byteNum = sizeArray[i];// 將FileChannel里的全部數據映射到ByteBuffer里MappedByteBuffer buffer = inChannel.map(FileChannel.MapMode.READ_ONLY, offset, byteNum);// ①offset += byteNum;// 創建FileOutputStream,以該文件輸出流創建FileChanneloutChannel = new FileOutputStream(newFileName).getChannel();// 直接將buffer里的數據全部輸出outChannel.write(buffer);// ②buffer.clear();//position=0,limit=capacity/*// 使用GBK/UTF-8字符集來創建解碼器Charset charset = Charset.forName("UTF-8");// 創建解碼器(CharsetDecoder)對象CharsetDecoder decoder = charset.newDecoder();// 使用解碼器將ByteBuffer轉換成CharBufferCharBuffer charBuffer = decoder.decode(buffer);int capacity = charBuffer.capacity();int limit = charBuffer.limit();// true - false, 因為字節數大于字符數(含中文字符)System.out.println((file.length() == capacity) + " - " + (capacity == limit));System.out.println(charBuffer);//輸出文件內容*/}//end of for-loopsuccess = true;} catch (FileNotFoundException e) {e.printStackTrace();} catch (CharacterCodingException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {//MyUtil.closeFileChannel(inChannel);//MyUtil.closeFileChannel(outChannel);closeCloseable(inChannel);closeCloseable(outChannel);}return success;}public static void main(String... args)throws Exception {System.out.println("①輸入exit并敲回車,結束程序。");System.out.println("②輸入大于1的整數(N)并敲回車,將文本分割成N分。");//try-with-resource語法try (BufferedReader bufReader = new BufferedReader(new InputStreamReader(System.in));){String line = null;while ((line = bufReader.readLine()) != null) {System.out.println("本次輸入的內容是:" + line);if (line.equalsIgnoreCase("exit")) {break;} else {try {int count = Integer.parseInt(line);if (count < 2) {System.out.println("請輸入大于1的整數:");} else {NUMBER_OF_FILES = count;System.out.println("文本將分割成" + NUMBER_OF_FILES + "份");long start = System.currentTimeMillis();boolean success = split();long end = System.currentTimeMillis();if (success) {System.out.println("文本分割已完成,耗時(ms)=" + (end -start));break;}}} catch (NumberFormatException e) {System.out.println("請輸入大于1的整數:");}}}} catch (IOException e) {e.printStackTrace();}} }
TXTSpliterEqualChars? import java.io.BufferedReader; import java.io.Closeable; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.text.DecimalFormat; /* javac -d . -encoding UTF-8 TXTSpliterEqualChars.javajava TXTSpliterEqualChars文本切割器(按字符數均分) */ public class TXTSpliterEqualChars {private static final String dirPath = ".";//當前目錄//private static final int NUMBER_OF_FILES = 10;//分割成N份private static int NUMBER_OF_FILES = 10;//分割成N份private static String absoluteDirPath = "";public static Charset CHARSET_UTF8 = Charset.forName("UTF-8");// UTF-8字符集,創建解碼器/編碼器的字符集public static Charset CHARSET_GBK = Charset.forName("GBK");// GBK字符集,創建解碼器/編碼器的字符集//原始文件private static String originalFileName = "";private static DecimalFormat format;private static java.util.LinkedHashSet<String> suffixSetOfTXTFile = new java.util.LinkedHashSet<String>();static {suffixSetOfTXTFile.add(".log");suffixSetOfTXTFile.add(".LOG");suffixSetOfTXTFile.add(".txt");suffixSetOfTXTFile.add(".TXT");suffixSetOfTXTFile.add(".text");suffixSetOfTXTFile.add(".TEXT");if (NUMBER_OF_FILES < 10) {format = new DecimalFormat("0");} else if (NUMBER_OF_FILES < 100) {format = new DecimalFormat("00");} else if (NUMBER_OF_FILES < 1000) {format = new DecimalFormat("000");}getabsoluteDirPath();//計算當前目錄的絕對路徑findTXTFile();//查找文本文件(找到當前目錄的第一個文本)}private static String getabsoluteDirPath() {if ("".equals(absoluteDirPath)) {File dir = new File(dirPath);absoluteDirPath = dir.getAbsolutePath();absoluteDirPath = absoluteDirPath.substring(0, absoluteDirPath.length() -1);//System.out.println("absoluteDirPath==" + absoluteDirPath);if (!absoluteDirPath.endsWith(File.separator)) {absoluteDirPath += File.separator;}}return absoluteDirPath;}private static String findTXTFile() {File dir = new File(absoluteDirPath);boolean findTXT = false;for (File file : dir.listFiles()) {if (file.isFile()) {String fileName = file.getName();int index = fileName.lastIndexOf(".");if (index < 1) {continue;}String suffix = fileName.substring(index, fileName.length());if (suffixSetOfTXTFile.contains(suffix)) {originalFileName = fileName;findTXT = true;break;}}}if (!findTXT) {String tipMsg = "ERROR:請將待分割的文本文件" + suffixSetOfTXTFile.toString() + "放到當前目錄下!";System.out.println(tipMsg);throw new RuntimeException(tipMsg);}return absoluteDirPath;}public static void closeCloseable(Closeable closeable) {try {if (closeable != null) {closeable.close();}} catch (Exception e) {e.printStackTrace();}}public static boolean split() {boolean success = false;if (NUMBER_OF_FILES < 2) {System.out.println("分割后的文件個數不能小于2!");return success;}//文件的絕對路徑String filePath = absoluteDirPath + originalFileName;File originalFile = new File(filePath);long sizeTotal = originalFile.length();FileChannel inChannel = null;FileChannel outChannel = null;try {int index = originalFileName.lastIndexOf(".");String fileName = originalFileName.substring(0, index);String suffix = originalFileName.substring(index, originalFileName.length());StringBuilder sb = new StringBuilder();inChannel = new FileInputStream(originalFile).getChannel();MappedByteBuffer byteBuffer = inChannel.map(FileChannel.MapMode.READ_ONLY, 0, sizeTotal);// 創建解碼器(CharsetDecoder)對象CharsetDecoder decoder = CHARSET_UTF8.newDecoder();// 使用解碼器將ByteBuffer轉換成CharBufferCharBuffer charBuffer = decoder.decode(byteBuffer);//int capacity = charBuffer.capacity();//字節數int limit = charBuffer.limit();//字符數?char[] chars = charBuffer.array();// 創建編碼器(CharsetEncoder)對象CharsetEncoder encoder = CHARSET_UTF8.newEncoder();long charNumTotal = limit;long charNumEach = charNumTotal / NUMBER_OF_FILES;long charRemainder = charNumTotal % NUMBER_OF_FILES;long[] charNumArray = new long[NUMBER_OF_FILES];for (int i = 0; i < NUMBER_OF_FILES; i++) {charNumArray[i] = charNumEach;}charNumArray[NUMBER_OF_FILES -1] = charNumEach + charRemainder;System.out.println("byteNumTotal=" + sizeTotal);System.out.println("charNumTotal=" + charNumTotal + ", charNumEach=" + charNumEach + ", charRemainder=" + charRemainder);System.out.println("charBuffer.array().length=" + chars.length);long offset = 0;for (int i = 0; i < NUMBER_OF_FILES; i++) {sb.setLength(0);sb.append(fileName).append("_").append(format.format(i + 1)).append(suffix);String newFileName = absoluteDirPath + sb.toString();long charNum = charNumArray[i];System.out.println("from " + offset + " to " + (offset + charNum) + ", charNum=" + charNum + ", charBuffer.remaining()=" + charBuffer.remaining() );CharBuffer cBuffer = CharBuffer.wrap(chars, (int) offset, (int) charNum);//System.out.println("cBuffer=" + cBuffer);//文本內容offset += charNum;// 使用編碼器將CharBuffer轉換成ByteBufferByteBuffer bBuffer = encoder.encode(cBuffer);// 創建FileOutputStream,以該文件輸出流創建FileChanneloutChannel = new FileOutputStream(newFileName).getChannel();// 直接將buffer里的數據全部輸出outChannel.write(bBuffer);// ②bBuffer.clear();//position=0,limit=capacity}//end of for-loopsuccess = true;} catch (FileNotFoundException e) {e.printStackTrace();} catch (CharacterCodingException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {//MyUtil.closeFileChannel(inChannel);//MyUtil.closeFileChannel(outChannel);closeCloseable(inChannel);closeCloseable(outChannel);}return success;}public static void main(String... args)throws Exception {System.out.println("①輸入exit并敲回車,結束程序。");System.out.println("②輸入大于1的整數(N)并敲回車,將文本分割成N分。");//try-with-resource語法try (BufferedReader bufReader = new BufferedReader(new InputStreamReader(System.in));){String line = null;while ((line = bufReader.readLine()) != null) {System.out.println("本次輸入的內容是:" + line);if (line.equalsIgnoreCase("exit")) {break;} else {try {int count = Integer.parseInt(line);if (count < 2) {System.out.println("請輸入大于1的整數:");} else {NUMBER_OF_FILES = count;System.out.println("文本將分割成" + NUMBER_OF_FILES + "份");long start = System.currentTimeMillis();boolean success = split();long end = System.currentTimeMillis();if (success) {System.out.println("文本分割已完成,耗時(ms)=" + (end -start));break;}}} catch (NumberFormatException e) {System.out.println("請輸入大于1的整數:");}}}} catch (IOException e) {e.printStackTrace();}} }--------------------------------分割線-------------------------------- 運行環境:JDK 1.7、1.8 windows可執行文件(*.bat) TXTSpliterEqualChars.bat,內容如下: java TXTSpliterEqualChars :pause TXTSpliterEqualBytes.bat,內容如下: java TXTSpliterEqualBytes :pause 在同目錄下放入待分割的文本文件,然后雙擊可執行文件:? --------------------------------分割線-------------------------------- |
總結
以上是生活随笔為你收集整理的文本分割器TXTSpliter的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: html自动滑动轮播代码,html+cs
- 下一篇: 一台设备驱动万物:苹果和三星即将推陈出新