Oct 21 2008

Retrieve All Documents from SharePoint

I needed a way to quickly retrieve all documents from SharePoint. You can specify which files to retrieve by changing the WHERE clause in the SQL query.  Otherwise, just pass it a path for the output files and set the connection string for your SharePoint content database.

Props to Michael O’Donovan for his helpful article on recovering SharePoint files.

public static void GetDocs(string path)
{
    SqlConnection _conn = new SqlConnection(Settings.Default.conn);
 
    string sql = "SELECT AllDocs.LeafName, AllDocs.DirName, AllDocStreams.[Content] ";
    sql += "FROM AllDocs INNER JOIN AllDocStreams ON AllDocStreams.Id = AllDocs.Id AND AllDocs.[Level] = AllDocStreams.[Level] ";
    sql += "WHERE AllDocs.IsCurrentVersion = 1 AND right(AllDocs.LeafName, 4) in ('.doc','.pdf','.xls','.csv','.ppt','docx','xlsx','pptx','.rtf','.txt','.xps','.mdb','.msg','.wmv','.xsn','.vsd')";
 
    SqlCommand getEmp = new SqlCommand(sql, _conn);
 
    FileStream fs;                          // Writes the BLOB to a file (*.bmp).
    BinaryWriter bw;                        // Streams the BLOB to the FileStream object.
    int bufferSize = 100;                   // Size of the BLOB buffer.
    byte[] outbyte = new byte[bufferSize];  // The BLOB byte[] buffer to be filled by GetBytes.
    long retval;                            // The bytes returned from GetBytes.
    long startIndex = 0;                    // The starting position in the BLOB output.
    string filename = string.Empty;         // The orignial filename.
    string outfile = string.Empty;
    string subpath = string.Empty;
    int i = 0;
 
    // Open the connection and read data into the DataReader.
    _conn.Open();
    SqlDataReader myReader = getEmp.ExecuteReader(CommandBehavior.SequentialAccess);
 
    while (myReader.Read())
    {
        i++;
 
        // Get the filename and build the file path for the output files.
        filename = myReader.GetString(0);
        subpath = myReader.GetString(1).Replace("/", "\\") + "\\";
        outfile = path + subpath + filename;
 
        if (!Directory.Exists(path + subpath))
        {
            Directory.CreateDirectory(path + subpath);
        }
 
        // To prevent overwriting files.
        if (File.Exists(outfile))
        {
            outfile = path + subpath + "_" + i.ToString() + filename;
        }
 
        // Create a file to hold the output.
        fs = new FileStream(outfile, FileMode.Create, FileAccess.Write);
        bw = new BinaryWriter(fs);
 
        // Reset the starting byte for the new BLOB.
        startIndex = 0;
 
        // Read the bytes into outbyte[] and retain the number of bytes returned.
        retval = myReader.GetBytes(2, startIndex, outbyte, 0, bufferSize);
 
        // Output status messages
        string msg = String.Format("{0} ", subpath + filename); 
        Console.Write(Right(msg,48));
 
        while (retval == bufferSize)
        {
            bw.Write(outbyte);
            bw.Flush();
 
            // Reposition the start index to the end of the last buffer and fill the buffer.
            startIndex += bufferSize;
            retval = myReader.GetBytes(2, startIndex, outbyte, 0, bufferSize);
        }
 
        // Write the remaining buffer.
        bw.Write(outbyte, 0, (int)retval);
        bw.Flush();
 
        // Close the output file.
        bw.Close();
        fs.Close();
        Console.Write(" [Complete]");
        Console.WriteLine();
    }
 
    // Close the reader and the connection.
    myReader.Close();
    _conn.Close();
}