diff --git a/.gitignore b/.gitignore index 63123fb..b7cd941 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ .DS_store +.idea \ No newline at end of file diff --git a/step_0.txt b/step_0.txt new file mode 100644 index 0000000..6835b56 --- /dev/null +++ b/step_0.txt @@ -0,0 +1,8 @@ +1. What _things_ (objects, nouns) are represented or described in this file? We can think of at least six different things. +A. Driver, Date, Cost, Rider, Rating, One Ride, + +2. From the things you listed in the previous question, all of those things have relationships to each other. (an ID belongs to a person, for instance. As an abstract, unrelated example a VIN belongs to a vehicle, and a vehicle has a VIN.) Consider the relationships between the pieces of data. +A. All of the information combines to form the data for one ride. You can find the amount of times or average times the driver or rider has a ride. You can find the average rating a rider gives or the average rating a driver gets. You can see this not just for one driver or rider, but for all as a grouped average. You can count how many rides on a given day, month, or year. You may be able to determine the possible cause for a high or low cost - distance, traffic / time of day, (split cost for ride share if an option)... but that would need more data like a time stamp. You could see if there is a link between cost and ratings. You can find rider trends in travel cost - are there more low cost shorter trips or higher cost long trips (although this may also need more data like start and stop locations). + +3. Lastly, in this assignment, we will rearrange all of the data into one data structure (with a lot of nested layers), that can be held in one variable. List some ideas: considering all of the relationships listed in the last question, what piece of data can contain the others at the top-most level? (Compared to the json example before, think about what the top-most layer of the hash and what that represented.) There is more than one correct answer, so just list out the options at this moment. +A. An array can hold the other data at the top-most level, with each hash representing a ride. Since each row represents a ride, I think it best to maintain that structure as much as possible. If I think about the things that I want to get from the data, or what this data may be used to analyse, I still think using the table/column headings as keys in each ride hash is wise. If more columns are added in the future to collect more data, it will be easier to add the column to the ride hashes and the data of that column by following the same pattern. \ No newline at end of file diff --git a/worksheet.rb b/worksheet.rb index 95b085d..fe1747d 100644 --- a/worksheet.rb +++ b/worksheet.rb @@ -1,17 +1,31 @@ +require 'date' ######################################################## # Step 1: Establish the layers # In this section of the file, as a series of comments, # create a list of the layers you identify. + # Layer 1 - Array of ride hashes + # Layer 2 - DRIVER_ID, DATE, COST, RIDER_ID, RATING + # Layer 3 - Day, Month, Year + # Which layers are nested in each other? + # Layer 3 is nested in Date of Layer 2, and Layer 2 is nested in Layer 1 + # Which layers of data "have" within it a different layer? + # Date of Layer 2 + # Which layers are "next" to each other? + # All of the column headings ######################################################## # Step 2: Assign a data structure to each layer # Copy your list from above, and in this section # determine what data structure each layer should have + # Layer 1 - Array of ride hashes + # Layer 2 - Hash with keys for DRIVER_ID, DATE, COST, RIDER_ID, RATING + # Layer 3 - Array of ints for Day, Month, Year + ######################################################## # Step 3: Make the data structure! @@ -23,12 +37,124 @@ # into this data structure, such as "DR0004" # and "3rd Feb 2016" and "RD0022" +# data structure blueprint +# [ +# { +# driver_id: str, +# date: [int, int, int], +# cost: int, +# rider_id: str, +# rating: int +# } +# ] + +rides_data = [ + ['DRIVER_ID','DATE','COST','RIDER_ID','RATING'], + ['DR0004','3rd Feb 2016','5','RD0022','5'], + ['DR0001','3rd Feb 2016','10','RD0003','3'], + ['DR0002','3rd Feb 2016','25','RD0073','5'], + ['DR0001','3rd Feb 2016','30','RD0015','4'], + ['DR0003','4th Feb 2016','5','RD0066','5'], + ['DR0004','4th Feb 2016','10','RD0022','4'], + ['DR0002','4th Feb 2016','15','RD0013','1'], + ['DR0003','5th Feb 2016','50','RD0003','2'], + ['DR0002','5th Feb 2016','35','RD0066','3'], + ['DR0004','5th Feb 2016','20','RD0073','5'], + ['DR0001','5th Feb 2016','45','RD0003','2'] +] + +# return an array with integer representation of dates +def line_break + puts '--------------------------------------------------------' +end + +def parse_date(date_string) + day_month_year = [] + d = Date.parse(date_string) + return day_month_year << d.mday << d.mon << d.year +end + +# create the data structure from the blueprint +def structure_ride_share(data) + top_array = [] + # create default hash keys based on column headings + headings = data[0].map { |heading| heading.downcase.to_sym } + + (data.length - 1).times do |index| + #skip heading + index += 1 + + #choose row of data + row = data[index] + + #populate ride hashes - really long! + ride = Hash[headings[0], row[0], headings[1], parse_date(row[1]), headings[2], row[2].to_i, headings[3], row[3], headings[4], row[4].to_i] + top_array << ride + end + return top_array +end + +ride_share_data = structure_ride_share(rides_data) +line_break +puts 'Ride Share Data:' +line_break +pp ride_share_data + ######################################################## # Step 4: Total Driver's Earnings and Number of Rides # Use an iteration blocks to print the following answers: +def find_unique_values(value_type, data) + unique_values = data.map { |ride_hash| ride_hash[value_type] }.uniq + return unique_values +end + # - the number of rides each driver has given +def count_total_rides(id, data) + count = data.count { |ride_hash| ride_hash.has_value? id } + return count +end + # - the total amount of money each driver has made +def total_ride_cost(id, data) + total_cost = 0 + data.each{ |ride_hash| total_cost += ride_hash[:cost] if ride_hash.value?(id) } + return total_cost +end + # - the average rating for each driver +def calculate_average_rating(id, data) + total_rides = count_total_rides(id, data) + total_ratings = 0.to_f + + data.each{ |ride_hash| total_ratings += ride_hash[:rating] if ride_hash.value?(id) } + + average = total_ratings / total_rides + return average.round(1) +end + +line_break +puts 'Driver Summary:' +line_break +driver_summaries = find_unique_values(:driver_id, ride_share_data).map do |driver| + { + driver_id: driver, + total_rides: count_total_rides(driver, ride_share_data), + total_cost: total_ride_cost(driver, ride_share_data), + average_rating: calculate_average_rating(driver, ride_share_data), + } +end +#test +pp driver_summaries + # - Which driver made the most money? -# - Which driver has the highest average rating? \ No newline at end of file +line_break +puts 'Driver that made the most money:' +line_break +p driver_summaries.max { |a_hash, b_hash| a_hash[:total_cost] <=> b_hash[:total_cost] }[:driver_id] + +# - Which driver has the highest average rating? +line_break +puts 'Driver that has the highest average rating:' +line_break +p driver_summaries.max { |a_hash, b_hash| a_hash[:rating] <=> b_hash[:rating] }[:driver_id] \ No newline at end of file