php删除文本文件中重复行的方法

本文实例讲述了php删除文本文件中重复行的方法,分享给大家供大家参考,具体分析如下:

这个php函数用来删除文件中的重复行,还可以指定是否忽略大小写,和指定换行符:

  1. /**
  2. * RemoveDuplicatedLines
  3. * This function removes all duplicated lines of the given text file.
  4. *
  5. * @param string
  6. * @param bool
  7. * @return string
  8. */
  9. function RemoveDuplicatedLines($Filepath, $IgnoreCase=false, $NewLine="\n"){
  10. if (!file_exists($Filepath)){
  11. $ErrorMsg = 'RemoveDuplicatedLines error: ';
  12. $ErrorMsg .= 'The given file ' . $Filepath . ' does not exist!';
  13. die($ErrorMsg);
  14. }
  15. $Content = file_get_contents($Filepath);
  16. $Content = RemoveDuplicatedLinesByString($Content, $IgnoreCase, $NewLine);
  17. // Is the file writeable?
  18. if (!is_writeable($Filepath)){
  19. $ErrorMsg = 'RemoveDuplicatedLines error: ';
  20. $ErrorMsg .= 'The given file ' . $Filepath . ' is not writeable!';
  21. die($ErrorMsg);
  22. }
  23. // Write the new file
  24. $FileResource = fopen($Filepath, 'w+');
  25. fwrite($FileResource, $Content);
  26. fclose($FileResource);
  27. }
  28. /**
  29. * RemoveDuplicatedLinesByString
  30. * This function removes all duplicated lines of the given string.
  31. *
  32. * @param string
  33. * @param bool
  34. * @return string
  35. */
  36. function RemoveDuplicatedLinesByString($Lines, $IgnoreCase=false, $NewLine="\n"){
  37. if (is_array($Lines))
  38. $Lines = implode($NewLine, $Lines);
  39. $Lines = explode($NewLine, $Lines);
  40. $LineArray = array();
  41. $Duplicates = 0;
  42. // Go trough all lines of the given file
  43. for ($Line=0; $Line < count($Lines); $Line++){
  44. // Trim whitespace for the current line
  45. $CurrentLine = trim($Lines[$Line]);
  46. // Skip empty lines
  47. if ($CurrentLine == '')
  48. continue;
  49. // Use the line contents as array key
  50. $LineKey = $CurrentLine;
  51. if ($IgnoreCase)
  52. $LineKey = strtolower($LineKey);
  53. // Check if the array key already exists,
  54. // if not add it otherwise increase the counter
  55. if (!isset($LineArray[$LineKey]))
  56. $LineArray[$LineKey] = $CurrentLine;
  57. else
  58. $Duplicates++;
  59. }
  60. // Sort the array
  61. asort($LineArray);
  62. // Return how many lines got removed
  63. return implode($NewLine, array_values($LineArray));
  64. }

使用范例:

  1. // Example 1
  2. // Removes all duplicated lines of the file definied in the first parameter.
  3. $RemovedLinesCount = RemoveDuplicatedLines('test.txt');
  4. print "Removed $RemovedLinesCount duplicate lines from the test.txt file.";
  5. // Example 2 (Ignore case)
  6. // Same as above, just ignores the line case.
  7. RemoveDuplicatedLines('test.txt', true);
  8. // Example 3 (Custom new line character)
  9. // By using the 3rd parameter you can define which character
  10. // should be used as new line indicator. In this case
  11. // the example file looks like 'foo;bar;foo;foo' and will
  12. // be replaced with 'foo;bar'
  13. RemoveDuplicatedLines('test.txt', false, ';');