Class: Figshare::Upload

Inherits:
PrivateArticles show all
Defined in:
lib/upload.rb

Overview

Upload files to figshare Nb. This can sometimes fail, so you need to check the md5 to ensure the file got there It can take a short while for the md5 to be calculated, so upload, wait, then check for a computed_md5. The status will show as “ic_checking”, “moving_to_final” then to “available”, I have seen it stuck at “moving_to_final”, but with the right computed_md5.

Constant Summary collapse

CHUNK_SIZE =
1048576

Instance Attribute Summary collapse

Attributes inherited from Base

#api_url, #article_index_file, #auth_token, #base_dir, #hostname, #institute_id

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from PrivateArticles

#article_delete, #article_resource, #article_version, #article_version_update_thumbnail, #author_delete, #authors, #authors_add, #authors_replace, #body, #categories, #categories_add, #categories_delete, #categories_replace, #create, #delete_all_files, #detail, #embargo_delete, #embargo_detail, #embargo_update, #file_delete, #file_detail, #files, #link_create, #link_delete, #link_update, #links, #list, #publish, #reserve_doi, #reserve_handle, #search, #update

Methods inherited from Base

#initialize

Constructor Details

This class inherits a constructor from Figshare::Base

Instance Attribute Details

#article_idObject

Returns the value of attribute article_id.



13
14
15
# File 'lib/upload.rb', line 13

def article_id
  @article_id
end

#bad_countObject

Returns the value of attribute bad_count.



14
15
16
# File 'lib/upload.rb', line 14

def bad_count
  @bad_count
end

#file_idObject

Returns the value of attribute file_id.



13
14
15
# File 'lib/upload.rb', line 13

def file_id
  @file_id
end

#file_infoObject

Returns the value of attribute file_info.



13
14
15
# File 'lib/upload.rb', line 13

def file_info
  @file_info
end

#file_nameObject

Returns the value of attribute file_name.



13
14
15
# File 'lib/upload.rb', line 13

def file_name
  @file_name
end

#new_countObject

Returns the value of attribute new_count.



14
15
16
# File 'lib/upload.rb', line 14

def new_count
  @new_count
end

#upload_hostObject

Returns the value of attribute upload_host.



13
14
15
# File 'lib/upload.rb', line 13

def upload_host
  @upload_host
end

#upload_parts_detailObject

Returns the value of attribute upload_parts_detail.



13
14
15
# File 'lib/upload.rb', line 13

def upload_parts_detail
  @upload_parts_detail
end

#upload_queryObject

Returns the value of attribute upload_query.



13
14
15
# File 'lib/upload.rb', line 13

def upload_query
  @upload_query
end

Class Method Details

.get_file_check_data(filename) ⇒ String, Integer

Calculate a local files MD5.

Parameters:

  • filename (String)

    Path/name of local file to MD5

Returns:

  • (String, Integer)

    MD5 as a Hex String, Size of the file in bytes.



20
21
22
23
24
25
26
27
28
29
# File 'lib/upload.rb', line 20

def self.get_file_check_data(filename)
  stat_record = File.stat(filename)
  md5 = Digest::MD5.new
  File.open(filename, 'rb') do |fd|
    while (buffer = fd.read(CHUNK_SIZE))
      md5.update(buffer)
    end
  end
  return md5.hexdigest, stat_record.size
end

Instance Method Details

#statusHash

Get status of the current upload. Just fetches the file record from figshare. Of interest is the status field, and the computed_md5 field

Returns:

  • (Hash)

    Figshare file record, or nil, if the call fails



133
134
135
136
137
138
139
# File 'lib/upload.rb', line 133

def status
  @file_info = nil
  file_detail(article_id: @article_id, file_id: @file_id) do |f|
    @file_info = f
  end
  raise 'Upload::status(): Failed to get figshare file record' if @file_info.nil?
end

#upload(article_id:, file_name:, trace: 0) ⇒ Object

Upload the file, to the Figshare article

Parameters:

  • article_id (Integer)

    Figshare article id

  • file_name (String)

    path/file_name to upload

  • trace (Integer) (defaults to: 0)

    0: no output, 1: per file upload message, 2: fuller trace



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/upload.rb', line 36

def upload(article_id:, file_name:, trace: 0)
  @article_id = article_id
  @file_name = file_name
  @trace = trace

  @file_id = nil
  @file_info = nil
  @upload_query = nil
  @upload_host = nil
  @upload_parts_detail = nil

  initiate_new_upload
  puts "New File_id: #{@file_id}\n\n" if @trace > 1

  get_file_info
  puts "@file_info: #{@file_info.to_j}\n\n" if @trace > 1

  get_upload_parts_details
  puts "@upload_parts_detail: #{@upload_parts_detail.to_j}\n\n" if @trace > 1

  upload_the_parts

  complete_upload
  if @trace > 1
    status
    puts "Final Status: #{@file_info.to_j}\n\n"
  end
end

#upload_dir(article_id:, directory:, delete_extras: false, exclude_dot_files: true, trace: 0) ⇒ Object

Upload all files in a directory, into one article. Check checksums, and only upload changed or new files Does not recurse through sub-directories, as figshare has a flat file structure.

Parameters:

  • article_id (Integer)

    Figshare article id

  • directory (String)

    path

  • delete_extras (Boolean) (defaults to: false)

    delete any files in the figshare end, that aren’t in the local directory.

  • trace (Integer) (defaults to: 0)

    0: no output, 1: per file upload message, 2: fuller trace



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/upload.rb', line 73

def upload_dir(article_id:, directory:, delete_extras: false, exclude_dot_files: true, trace: 0)
  @new_count = 0
  @bad_count = 0

  files = {}
  cache_article_file_md5(article_id: article_id)

  DirR.walk_dir(directory: directory, walk_sub_directories: false) do |d, f|
    next if exclude_dot_files && f =~ /^\..*/

    files[f] = true  # NOTE: that we have seen this filename
    if @md5_cache[f] # check to see if it has already been uploaded
      md5, _size = Upload.get_file_check_data("#{d}/#{f}")
      if @md5_cache[f][:md5] != md5 # file is there, but has changed, or previously failed to upload.
        puts "Deleting: #{article_id} << #{d}/#{f} #{@md5_cache[f][:id]} MISMATCH '#{@md5_cache[f]}' != '#{md5}'" if trace > 0
        file_delete(article_id: article_id, file_id: @md5_cache[f][:id])
        @bad_count += 1
        puts "Re-ADDING: #{article_id} << #{d}/#{f}" if trace > 0
        upload(article_id: article_id, file_name: "#{d}/#{f}", trace: trace)
        @new_count += 1
      elsif trace > 1
        puts "EXISTS: #{article_id} #{d}/#{f}"
      end
    else
      puts "ADDING: #{article_id} << #{d}/#{f}" if trace > 0
      upload(article_id: article_id, file_name: "#{d}/#{f}", trace: trace)
      @new_count += 1
    end
  end

  # Print out filename of files in the Figshare article, that weren't in the directory.
  @md5_cache.each do |fn, v|
    if ! files[fn]
      # File exists on Figshare, but not on the local disk
      if delete_extras
        puts "Deleteing EXTRA: #{article_id} << #{fn} #{v[:id]}" if trace > 0
        file_delete(article_id: article_id, file_id: @md5_cache[f][:id])
      elsif trace > 0
        puts "EXTRA: #{article_id} << #{fn} #{v[:id]}"
      end
    end
  end
end