Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
.bundle/
.idea
benchmark/
lib/linguist/samples.json
lib/linguist/samples_data.rb

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NB: since the code is pretty printed, it could make sense to commit it, with some tests to ensure it's upda to date.

/grammars
/node_modules
test/fixtures/ace_modes.json
Expand Down
12 changes: 5 additions & 7 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@ require 'rake/clean'
require 'rake/testtask'
require 'rake/extensiontask'
require 'yaml'
require 'yajl'
require 'open-uri'
require 'json'
require 'open3'

task :default => :test
Expand All @@ -23,7 +21,7 @@ task :test => [:compile, :check_samples, :fetch_ace_modes]

desc "Check that we have samples.json generated"
task :check_samples do
unless File.exist?('lib/linguist/samples.json')
unless File.exist?('lib/linguist/samples_data.rb')
Rake::Task[:samples].invoke
end
end
Expand All @@ -45,8 +43,8 @@ end

task :samples => :compile do
require 'linguist/samples'
json = Yajl.dump(Linguist::Samples.data, :pretty => false)
File.write 'lib/linguist/samples.json', json
require 'pp'
File.write 'lib/linguist/samples_data.rb', "#frozen_string_literal: true\nDATA = #{PP.pp(Linguist::Samples.data, +'')}"
end

task :flex do
Expand Down Expand Up @@ -94,9 +92,9 @@ task :build_gem => :samples do
rm_rf "grammars"
sh "script/grammar-compiler compile -o grammars || true"
languages = YAML.load_file("lib/linguist/languages.yml")
File.write("lib/linguist/languages.json", Yajl.dump(languages))
File.write("lib/linguist/languages_data.rb", "#frozen_string_literal: true\nDATA = #{PP.pp(languages, +'')}")
`gem build github-linguist.gemspec`
File.delete("lib/linguist/languages.json")
File.delete("lib/linguist/languages_data.rb")
end

namespace :benchmark do
Expand Down
14 changes: 5 additions & 9 deletions lib/linguist/language.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
require 'cgi'
require 'yaml'
begin
require 'yajl'
rescue LoadError
require 'json'
end

require 'linguist/classifier'
require 'linguist/heuristics'
Expand Down Expand Up @@ -501,11 +496,12 @@ def inspect
popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))

languages_yml = File.expand_path("../languages.yml", __FILE__)
languages_json = File.expand_path("../languages.json", __FILE__)
languages_rb = File.expand_path("../languages_data.rb", __FILE__)

if File.exist?(languages_json)
serializer = defined?(Yajl) ? Yajl : JSON
languages = serializer.load(File.read(languages_json))
if File.exist?(languages_rb)
mod = Module.new
load(languages_rb, mod)
languages = mod::DATA
else
languages = YAML.load_file(languages_yml)
end
Expand Down
14 changes: 4 additions & 10 deletions lib/linguist/samples.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ module Samples
ROOT = File.expand_path("../../../samples", __FILE__)

# Path for serialized samples db
PATH = File.expand_path('../samples.json', __FILE__)
PATH = File.expand_path('../samples_data.rb', __FILE__)

# Hash of serialized samples object, cached in memory
def self.cache
Expand All @@ -24,15 +24,9 @@ def self.cache

# Hash of serialized samples object, uncached
def self.load_samples
serializer = defined?(Yajl) ? Yajl : JSON
data = serializer.load(File.read(PATH, encoding: 'utf-8'))
# JSON serialization does not allow integer keys, we fix them here
for lang in data['centroids'].keys
fixed = data['centroids'][lang].to_a.map { |k,v| [k.to_i, v] }
data['centroids'][lang] = Hash[fixed]
end
Comment on lines -27 to -33

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is no longer needed, and was about 5% of load time.


data
mod = Module.new
load(PATH, mod)
mod::DATA
end

# Public: Iterate over each sample.
Expand Down