library(knitr)
library(rmarkdown)
library(stringi)

The day before Govhack started I was standing in a shop looking out onto a busy Karangahape Road intersection, talking to the shop owner about how there were lots of traffic accidents on the intersection. Mostly they were trying to convince an employee to cross at the pedestrian crossing rather than shooting between parked cars. Someone suggested that surely information about how many accidents happened at that intersection must be online somewhere, and they searched on Google to find it. They couldn’t find it.

This hack attempts to at least answer the question: how many traffic accidents happen at this intersection?

Data source

I will use the disaggregated crash data from NZ Transport Agency (NZTA) to try to answer this question. The data set has been downloaded and is in this repo as a 62MB CSV file, Disaggregated-crash-data.csv. An accompanying metadata file has also been downloaded from NZTA, disaggregated-crash-data-metadata.csv.

Load crash data

## this file is *NOT* in UTF-8. best guess at encoding:
rawData = rawToChar(
    readBin("Disaggregated-crash-data.csv", "raw", 100000))
(encoding = stringi::stri_enc_detect(rawData)[[1]]$Encoding[1])
## [1] "ISO-8859-1"
## read the data using this encoding
crashData = read.csv(file = "Disaggregated-crash-data.csv",
                     fileEncoding = encoding)
summary(crashData)
##    CRASH_YEAR     CRASH_FIN_YEAR   CRASH_SEV   fatal_count     
##  Min.   :2000   2007/2008: 12150   F:  5675   Min.   :0.00000  
##  1st Qu.:2004   2006/2007: 11787   M:137600   1st Qu.:0.00000  
##  Median :2008   2008/2009: 11530   S: 34027   Median :0.00000  
##  Mean   :2008   2005/2006: 11134              Mean   :0.03627  
##  3rd Qu.:2012   2009/2010: 11011              3rd Qu.:0.00000  
##  Max.   :2017   2003/2004: 10704              Max.   :9.00000  
##                 (Other)  :108986                               
##  seriousinj_count  minorinj_count                            MULTI_VEH    
##  Min.   : 0.0000   Min.   : 0.00   Multi vehicle                  :82376  
##  1st Qu.: 0.0000   1st Qu.: 1.00   Single vehicle                 :64202  
##  Median : 0.0000   Median : 1.00   Vehicle(s)+Pedestrian(s)       :16108  
##  Mean   : 0.2349   Mean   : 1.08   Vehicle(s)+Cyclist(s) only     :12576  
##  3rd Qu.: 0.0000   3rd Qu.: 1.00   Cyclists only                  : 1046  
##  Max.   :12.0000   Max.   :34.00   Vehicle(s)+multiple other types:  741  
##                                    (Other)                        :  253  
##                HOLIDAY                 LG_REGION_DESC      TLA_ID     
##  Christmas/New Year:  4887   Auckland         :51818   Min.   : 1.00  
##  Easter            :  2079   Canterbury       :22279   1st Qu.: 7.00  
##  Labour Weekend    :  1599   Waikato          :20385   Median :25.00  
##  None              :167103   Wellington       :15284   Mean   :31.35  
##  Queens Birthday   :  1634   Otago            :12809   3rd Qu.:53.00  
##                              Manawatu/Wanganui:10372   Max.   :75.00  
##                              (Other)          :44355                  
##               TLA_NAME         AU_ID            MB_ID        
##  Auckland         :51813   Min.   :     0   Min.   :      0  
##  Christchurch City:14389   1st Qu.:521132   1st Qu.: 663700  
##  Dunedin City     : 7154   Median :542550   Median :1304800  
##  Wellington City  : 6203   Mean   :550463   Mean   :1464681  
##  Hamilton City    : 5096   3rd Qu.:582800   3rd Qu.:2356700  
##  Waikato District : 4131   Max.   :627201   Max.   :3194800  
##  (Other)          :88516   NA's   :59       NA's   :59       
##     EASTING           NORTHING        CRASH_LOCN1    
##  Min.   :      0   Min.   :      0   SH 1N  : 12665  
##  1st Qu.:1613956   1st Qu.:5403987   SH 1S  :  6179  
##  Median :1755778   Median :5714267   SH 2   :  5877  
##  Mean   :1702679   Mean   :5598062   SH 3   :  3352  
##  3rd Qu.:1793828   3rd Qu.:5911801   SH 6   :  3282  
##  Max.   :2110092   Max.   :6189084   SH 16  :  1944  
##                                      (Other):144003  
##            CRASH_LOCN2              OUTDTD_LOCN_DESC   CRASH_RP_RS    
##  SH 1N           :   844   Current location :176410   Min.   :   0.0  
##  SH 2            :   533   Outdated Location:   892   1st Qu.:   0.0  
##  SH 1S           :   427                              Median :   0.0  
##  GREAT SOUTH ROAD:   377                              Mean   : 103.6  
##  SH 3            :   364                              3rd Qu.:  16.0  
##  VICTORIA ST     :   363                              Max.   :1168.0  
##  (Other)         :174394                                              
##        INTERSECTION            JUNCTION_TYPE   CR_RD_SIDE_RD  
##  At Landmark :  2686   Driveway       :14011   Min.   :1.000  
##  Intersection: 60835   Multi Rd Join  : 1310   1st Qu.:1.000  
##  Unknown     :113781   Roundabout     : 5490   Median :1.000  
##                        T Type Junction:36272   Mean   :1.063  
##                        Unknown        :95630   3rd Qu.:1.000  
##                        X Type Junction:22699   Max.   :2.000  
##                        Y Type Junction: 1890                  
##  CRASH_DIRN_DESC   CRASH_DIST       CRASH_RP_DIRN_DESC DIRN_ROLE1_DESC
##       :63522     Min.   :    0.0             :153996   East   :36100  
##  East :22721     1st Qu.:    0.0   Decreasing: 10315   North  :53870  
##  North:33715     Median :   40.0   Increasing: 12991   South  :52382  
##  South:34212     Mean   :  341.4                       Unknown:    2  
##  West :23132     3rd Qu.:  250.0                       West   :34948  
##                  Max.   :34000.0                                      
##                                                                       
##  CRASH_RP_DISP   CRASH_SH_DESC  CRASH_RP_SH      CRASH_RP_NEWS_DESC
##  Min.   :    0   No :119919           :120924             :168064  
##  1st Qu.:    0   Yes: 57383    1N     : 13179   Eastbound :   821  
##  Median :    0                 1S     :  6731   Northbound:  3769  
##  Mean   : 1988                 2      :  6160   Southbound:  3733  
##  3rd Qu.: 1803                 3      :  3671   Westbound :   915  
##  Max.   :23384                 6      :  3365                      
##                                (Other): 23272                      
##       INTSN_MIDBLOCK     FLAT_HILL               ROAD_CHARACTER  
##  Intersection: 66658   Flat   :138219   Bridge          :  3504  
##  Mid Block   :110644   Hill   : 38807   Motorway Ramp   :  1487  
##                        Unknown:   276   Railway Crossing:   461  
##                                         Unknown         :171850  
##                                                                  
##                                                                  
##                                                                  
##         ROAD_CURVATURE   ROAD_LANE           ROAD_MARKINGS   
##  Easy Curve    : 29404    :    61   Centre Line     :114242  
##  Moderate Curve: 27436   1: 11198   No Marks        : 20626  
##  Severe Curve  :  5970   2:163767   No Passing Lines:  9486  
##  Straight Road :114491   O:  2276   Painted Island  :  9757  
##  Unknown       :     1              Ped Crossing    :  2587  
##                                     Raised Island   : 20162  
##                                     Unknown         :   442  
##    ROAD_SURFACE         ROAD_WET        NUM_LANES    
##  Sealed  :172442   Dry      :132406   Min.   :0.000  
##  Unknown :     1   Ice/ Snow:  2103   1st Qu.:2.000  
##  Unsealed:  4859   Unknown  :   309   Median :2.000  
##                    Wet      : 42484   Mean   :2.305  
##                                       3rd Qu.:2.000  
##                                       Max.   :8.000  
##                                                      
##          TRAFFIC_CTRL      SPD_LIM         ADV_SPD        TMP_SPD_LIM    
##  Give Way Sign :29361   050    :93384   Min.   :15.00    Min.   : 10.00  
##  N/A           :40981   100    :64294   1st Qu.:45.00    1st Qu.: 30.00  
##  Nil           :80122   080    : 7894   Median :55.00    Median : 30.00  
##  Points Man    :   28   070    : 6269   Mean   :57.52    Mean   : 43.99  
##  School Patrol :  100   060    : 3794   3rd Qu.:75.00    3rd Qu.: 50.00  
##  Stop Sign     :11634   030    :  937   Max.   :95.00    Max.   :100.00  
##  Traffic Signal:15076   (Other):  730   NA's   :169445   NA's   :175150  
##       URBAN          DARK_LIGHT            LIGHT        STREET_LIGHT  
##  Openroad: 72255   Dark   : 55610   Bright Sun:69244   None   :31032  
##  Urban   :105047   Light  :121579   Dark      :47199   Off    :36308  
##                    Unknown:   113   Overcast  :52335   On     :32507  
##                                     Twilight  : 8411   Unknown:77455  
##                                     Unknown   :  113                  
##                                                                       
##                                                                       
##       WEATHER_A            WEATHER_B       Post.Or.Pole   
##  Fine      :140174   Frost      :  2080   Min.   :0.0000  
##  Heavy Rain:  7485   Strong Wind:  2825   1st Qu.:0.0000  
##  Light Rain: 25692   Unknown    :172397   Median :0.0000  
##  Mist      :  2622                        Mean   :0.0598  
##  Snow      :   404                        3rd Qu.:0.0000  
##  Unknown   :   925                        Max.   :3.0000  
##                                                           
##    Guard.Rail       Water.River         Cliff.Bank          Ditch        
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.000000   Median :0.00000   Median :0.00000  
##  Mean   :0.02748   Mean   :0.007287   Mean   :0.06232   Mean   :0.05178  
##  3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :4.00000   Max.   :2.000000   Max.   :3.00000   Max.   :3.00000  
##                                                                          
##      Fence              Tree             Kerb         Traffic.Island   
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.0000   Median :0.00000   Median :0.00000  
##  Mean   :0.09298   Mean   :0.0568   Mean   :0.01313   Mean   :0.01046  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :3.00000   Max.   :3.0000   Max.   :3.00000   Max.   :2.00000  
##                                                                        
##  Parked.Vehicle    Traffic.Sign        Vehicle            Bridge        
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000000  
##  Median :0.0000   Median :0.00000   Median :0.00000   Median :0.000000  
##  Mean   :0.0524   Mean   :0.01967   Mean   :0.01054   Mean   :0.008212  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.000000  
##  Max.   :5.0000   Max.   :2.00000   Max.   :4.00000   Max.   :4.000000  
##                                                                         
##    Over.Bank          Animals           Stray.Animal     
##  Min.   :0.00000   Min.   :0.0000000   Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:0.0000000   1st Qu.:0.000000  
##  Median :0.00000   Median :0.0000000   Median :0.000000  
##  Mean   :0.02634   Mean   :0.0002369   Mean   :0.005962  
##  3rd Qu.:0.00000   3rd Qu.:0.0000000   3rd Qu.:0.000000  
##  Max.   :3.00000   Max.   :1.0000000   Max.   :3.000000  
##                                                          
##  Obj.thrown.dropped      Debris         House.Or.Bldg     
##  Min.   :0.0000000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:0.0000000   1st Qu.:0.000000   1st Qu.:0.000000  
##  Median :0.0000000   Median :0.000000   Median :0.000000  
##  Mean   :0.0003553   Mean   :0.002143   Mean   :0.009825  
##  3rd Qu.:0.0000000   3rd Qu.:0.000000   3rd Qu.:0.000000  
##  Max.   :2.0000000   Max.   :3.000000   Max.   :2.000000  
##                                                           
##      Train          Phone.Box.Etc.    Slip.Or.Flood      
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.0000000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.0000000  
##  Median :0.000000   Median :0.00000   Median :0.0000000  
##  Mean   :0.001229   Mean   :0.00432   Mean   :0.0007953  
##  3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.0000000  
##  Max.   :1.000000   Max.   :2.00000   Max.   :2.0000000  
##                                                          
##    Roadworks             Other        
##  Min.   :0.0000000   Min.   :0.00000  
##  1st Qu.:0.0000000   1st Qu.:0.00000  
##  Median :0.0000000   Median :0.00000  
##  Mean   :0.0006035   Mean   :0.01092  
##  3rd Qu.:0.0000000   3rd Qu.:0.00000  
##  Max.   :2.0000000   Max.   :2.00000  
## 
dim(crashData)
## [1] 177302     73

Excellent, 177302 rows in 73 columns of data. What do the variables represent?

Load crash metadata

## this file is *NOT* in UTF-8. best guess at encoding:
rawMeta = rawToChar(
    readBin("disaggregated-crash-data-metadata.csv", "raw", 100000))
(encoding = stringi::stri_enc_detect(rawMeta)[[1]]$Encoding[1])
## [1] "windows-1252"
## read the data using this encoding
crashMetadata = read.csv(file = "disaggregated-crash-data-metadata.csv",
                         stringsAsFactors = FALSE,
                         fileEncoding = toupper(encoding))
names(crashMetadata)
## [1] "Variable.Name" "Description"   "X"             "X.1"          
## [5] "X.2"
## what's in columns 3:5?
head(crashMetadata[, 3:5])
##    X X.1 X.2
## 1 NA  NA  NA
## 2 NA  NA  NA
## 3 NA  NA  NA
## 4 NA  NA  NA
## 5 NA  NA  NA
## 6 NA  NA  NA
## nothing?
all(is.na((crashMetadata)[, 3:5]))
## [1] TRUE
## drop them
crashMetadata = crashMetadata[, 1:2]
## print the table
kable(crashMetadata)
Variable.Name Description
crash_year The year in which a crash occurred, if known.
crash_fin_year The financial (fin) year in which a crash occurred, if known.
crash_sev The severity of a crash. Possible values are ‘F’ (fatal), ‘S’ (serious), ‘M’ (minor). This is determined by the worst injury sustained in the crash at time of entry.
fatal_count A count of the number of fatal casualties associated with this crash.
seriousinj_count A count of the number of serious injuries (inj) associated with this crash.
minorinj_count A count of the number of minor injuries (inj) associated with this crash.
multi_veh A variable derived from the number of vehicles which are given roles in the crash. The variable has the following possible values; ‘single vehicle’, ‘multi-vehicle’ , ‘cyclist and vehicle(s)’, ‘pedestrian and vehicle(s)’ , ‘cyclist only’, ‘cyclist(s) and pedestrian(s)’ , ‘vehicle(s) and other’, ‘others, no vehicles’ and ‘other’. ‘Vehicle’ means non-parked vehicle. Parked vehicles are treated as objects in a crash.
holiday Indicates where a crash occurred during a ‘Christmas/New Year’, ‘Easter’, ‘Queens Birthday’ or ‘Labour Weekend’ holiday period, otherwise ‘None’.
lg_region_desc Identifies the local government (LG) region. The boundaries match territorial local authority (TLA) boundaries in most places. A blank cell occurs where the crash is said to have occurred in a particular TLA and the LG boundaries do not match.
tla_id The unique identifier for a territorial local authority (TLA). Each crash is assigned a TLA based on where the crash occurred.
tla_name The name of the territorial local authority (TLA) the crash has been attributed.
au_id The unique identifier of an area unit.
mb_id The unique identifier of a meshblock.
easting The easting coordinate of an object (usually a crash) expressed in NZMG referred to the WGS84 datum to a precision of 1m.
northing The northing coordinate of an object (usually a crash) expressed in NZMG referred to the WGS84 datum to a precision of 1m.
crash_locn1 Part 1 of the ‘crash location’ (crash_locn). May be a road name, route position (RP), landmark, or other, e.g. ‘Ninety Mile Beach’. Used for location descriptions in reports etc.
crash_locn2 Part 2 of the ‘crash location’ (crash_locn). May be a side road name, landmark etc. Used for location descriptions in reports etc.
outdtd_locn_desc Indicates if the location for this crash is an ‘Outdated Location’(outdtd_locn) or ‘Current location’. A crash is said to have an ‘Outdated location’ where the road might have moved, or does not exist anymore.
crash_rp_rs The ‘reference station’ (RS) for the ‘route position’ (RP) of a crash.
intersection Indicate if a crash happened at an ‘Intersection’, ‘At Landmark’ or ‘Unknown’.
junction_type The type of junction the crash happened at. Possible road junctions include ‘Driveway’, ‘Roundabout’, ‘X Type Junction’, ‘T Type Junction’, ‘Y Type Junction’, or ‘Multi Road Join’. The junction type may also be unknown. Note crashes that did not occur at a junction are also given a value of unknown.
cr_rd_side_rd Indicates whether the principal vehicle in a crash was on the crash road (cr_rd) [1] or side road (sd_rd) [2] at the time of the crash. Note that ‘on side road’ (2) can only happen if the crash occurred at an intersection.
crash_dirn_desc The direction (dirn) of the crash from the reference point. Values possible are ‘North’, ‘East’, ‘South’ or ‘West’.
crash_dist The distance (dist) of the crash from the reference point for the crash. The reference point is often the intersection of ‘crash road’ and ‘side road’ (refer to ‘cr_rd_sd_rd’ variable).
crash_rp_dirn_desc Indicates the direction of travel (where known) on a State Highway (SH) with respect to the highway origin. Possible values include ‘Increasing’ where the crash occurred in increasing distance from SH origin, ‘Decreasing’ where the crash occurred in decreasing distance to the SH origin, or blank.
dirn_role1_desc The direction (dirn) of the principal vehicle involved in the crash. Possible values are North, South, East or West.
crash_rp_disp The displacement (disp) of the crash from a reference station (RS). Part of the crash route position (RP).
crash_sh_desc Indicates where a crash is reported to have occurred on a State Highway (SH). Possible values include ‘Yes’ where the crash occurred on a SH, otherwise ‘No’.
crash_rp_sh The State Highway (SH) on which a crash occurred. This is part of a ‘route position’ (RP) for the crash. Possible values can be any valid natural SH designation reference (e.g. ‘1N’ is SH1 in the North Island.).
crash_rp_news_desc Where the crash occurred on a median-divided State Highway (SH), this flag indicates which side of the median the crash happened. Values ‘Northbound’, ‘Southbound, ’Eastbound’ or ‘Westbound’.
intsn_midblock A derived variable to indicate if a crash occured at an intersection (intsn) or not. The ‘intsn_midblock’ variable is calculated using the ‘intersection’ and ‘junction_type’ variables. Values are ‘Intersection’ (where intersection variable = ‘Intersection’ or {‘Intersection’ = ‘At Landmark’ and junction_type is not in (‘Unknown’ or ‘Driveway’)} OR {Intersection = ‘Unknown’ and crash_dist <= 10}), otherwise ‘Midblock’ for crashes not meeting the criteria for ‘Intersection’).
flat_hill Whether the road is flat or sloped. Possible values include ‘Flat or ’Hill’.
road_character The general nature of the road. Possible values include ‘Bridge’, ‘Motorway Ramp’, ‘Railway Crossing’ or ‘Unknown’.
road_curvature The curvature of the road. Possible values include ‘Straight Road’, ‘Easy Curve’, ‘Moderate Curve’ or ‘Severe curve’.
road_lane The lane configuration of the road. Possible values : ‘1’ (one way), ‘2’ (two way), ‘M’ (for where a median exists), ‘O’ (for off-road lane configuations), ‘’ ( for unknown or invalid configuarations).
road_markings The road markings at the crash site. Possible values: ‘Ped Crossing’ (for pedestrian crossings), ‘Raised Island’, ‘Painted Island’, ‘No Passing Lanes’, ‘Centre Line’, ‘No Marks’ or ‘Unknown’.
road_surface The road surface description applying at the crash site. Possible values: ‘Sealed’ or ‘Unsealed’.
road_wet The road wetness at the time and place of the crash. Possible values: ‘Wet’, ‘Dry’, ‘Ice/Snow’ or ‘Unknown’
num_lanes The number(num) of lanes on the crash road.
traffic_ctrl The traffic control (ctrl) signals at the crash site. Possible values are ‘Traffic Signals’, ‘Stop Sign’, ‘Give Way Sign’, ‘Pointsman’, ‘School Patrol’, ‘Nil’ or ‘N/A’.
spd_lim The speed (spd) limit (lim) in force at the crash site at the time of the crash. May be a number, or ‘LSZ’ for a limited speed zone.
adv_spd The advisory (adv) speed (spd) at the crash site at the time of the crash.
tmp_spd_lim The temporary (temp) speed (spd) limit (lim) at the crash site if one exists (e.g. for road works).
urban A derived variable using the ‘spd_lim’ variable. Possible values are ‘Urban’ (urban, spd_lim < 80) or ‘Open Road’ (open road, spd_lim >=80 or ‘LSZ’).
dark_light A variable derived from the ‘light’ variable. Values ‘Dark’ (if ‘light’ = ‘Dark’ or ‘Twilight’), ‘Light’ ( ‘light’ = ‘Bright’, ‘Overcast’) or ‘Unknown’ (light = ‘’).
light The light at the time and place of the crash. Possible values: ‘Bright Sun’, ‘Overcast’, ‘Twilight, ’Dark’ or ‘Unknown’.
street_light The street lighting at the time of the crash. Possible values ‘On’, ‘Off’, ‘None’ or ‘Unknown’.
weather_a Indicates weather at the crash time/place. See wthr_b. Values that are possible are ‘Fine’, ‘Mist’, ‘Light Rain’, ‘Heavy Rain’, ‘Snow’, ‘Unknown’.
weather_b The weather at the crash time/place. See weather_a. Values ‘Frost’, ‘Strong Wind’ or ‘Unknown’.
Post or Pole Derived variable to indicate how many times a post or pole was struck in the crash. This includes light, power, phone, utility poles and objects practically forming part of a pole (i.e. ‘Transformer Guy’ wires)
Guard Rail Derived variable to indicate how many times a guard or guard rail was struck in the crash. This includes ‘New Jersey’ barriers, ‘ARMCO’, sand filled barriers, wire catch fences, etc.
Water/River Derived variable to indicate how many times a body of water (including rivers, streams, lakes, the sea, tidal flates, canals, watercourses or swanps) was struck in the crash.
Cliff Bank Derived variable to indicate how many times a ‘cliff’ or ‘bank’ was struck in the crash. This includes retaining walls
Ditch Derived variable to indicate how many times a ‘ditch’ or ‘waterable drainage channel’ was struck in a crash.
Fence Derived variable to indicate how many times a ‘fence’ was struck in the crash. This includes letterbox(es), hoardings, private roadside furniture, hedges, sight rails, etc.
Kerb Derived variable to indicate how many times a kerb was struck in the crash, that contributed directly to the crash.
Traffic Island Derived variable to indicate how many times a traffic island, medians (excluding barriers)was struck in the crash.
Parked Vehicle Derived variable to indicate how many times a parked or unattended vehicle was struck in the crash. This variable can include trailers.
Traffic Sign Derived variable to indicate how many times ‘traffic signage’ (including traffic signals, their poles, bollards or roadside delineators) was struck in the crash.
Vehicle Derived variable to indicate how many times a stationary attended vehicle was struck in the crash. This includes broken down vehicles, workmen’s vehicles, taxis, buses.
Bridge Derived variable to indicate how many times a bridge, tunnel, the abutments, handrails were struck in the crash.
Over Bank Derived variable to indicate how many times an embankment was struck or driven over during a crash. This variable includes other vertical drops driven over during a crash.
Animals Derived variable to indicate how many times an ‘Animal(s)’ was struck in the crash. This is used where the animals, being driven or led, were under control.
Stray Animal Derived variable to indicate how many times a stray animal(s) was struck in the crash. This variable includes wild animals such as pigs, goats, deer, straying farm animals, house pets and birds.
Obj thrown/dropped Derived variable to indicate how many times objects were thrown at or dropped on vehicles in the crash.
Debris Derived variable to indicate how many times debris, boulders or items dropped or thrown from a vehicle(s) were struck in the crash
House Or Bldg Derived variable to indicate how many times a houses, garages, sheds or other buildings(Bldg) were struck in the crash
Train Derived variable to indicate how many times a train, rolling stock or jiggers was struck in the crash, whether stationary or moving
Phone Box Etc Derived variable to indicate how many times a telephone kiosk traffic signal controllers, bus shelters or other public furniture was struck in the crash
Slip Or Flood Derived variable to indicate how many times landslips, washouts or floods (excluding rivers) were objects struck in the crash
Roadworks Derived variable to indicate how many times an object associated with ‘roadworks’ (including signs, cones, drums, barriers, but not roadwork vehicles) was struck during the crash
Trees Derived variable to indicate how many times trees or other growing items were struck during the crash.
Other Derived variable to indicate how many times an object was struck in a crash and the object struck was not pre-defined. This variable includes stockpiled materials, rubbish bins, fallen poles, fallen trees, etc.

Keen spotters will notice that the metadata and the column names have inconsistent capitalisation of variables, so I’ll just make all the names of my data rows lower case to prevent confusion (maybe).

names(crashData) = tolower(names(crashData))

Useful things I can use:

The location

map of intersection of Karangahape and Newton Roads, Auckland © OpenStreetMap contributors

map of intersection of Karangahape and Newton Roads, Auckland © OpenStreetMap contributors

The intersection in question is Karangahape and Newton Roads, Newton, Auckland. The street names I will search for are:

streetNames = toupper(c(
    "Abbey St",         # for some reason street = "ST"
    "Great North Road", # while road = "ROAD"
    "Gundry St",    # go figure
    "Karangahape Road",
    "Newton Road",
    "Ponsonby Road"))

## check street names occure in the data
all(sapply(X = streetNames, FUN = function(s) { s %in% crashData$crash_locn1 }))
## [1] TRUE
all(sapply(X = streetNames, FUN = function(s) { s %in% crashData$crash_locn2 }))
## [1] TRUE

I will take a subset of the data where one of these street names occurs in both crash_locn1 and crash_locn2.

intersectionData = crashData[
    crashData$crash_locn1 %in% streetNames
    & crashData$crash_locn2 %in% streetNames, ]

## check that we're in the right part of the country
unique(intersectionData[, c("lg_region_desc", "tla_name")])
##      lg_region_desc tla_name
## 1085       Auckland Auckland
## that said Auckland, right? ✓

Map co-ordinates

Why didn’t I use the northing and easting variables to do some fancy map stuff. Because I have no idea what the heck they are! Somebody from NZTA, please, explain to me how I can convert these numbers into map co-ordinates for modern internet humans! Let me explain.

Here the extreme northing and easting values for the rows in my data subset:

(mapcoords = c(
    minNorth = min(intersectionData$northing),
    minEast = min(intersectionData$easting),
    maxNorth = max(intersectionData$northing),
    maxEast = max(intersectionData$easting)
))
## minNorth  minEast maxNorth  maxEast 
##  5919188  1756123  5919343  1756399

The metadata tells us the that easting is “The easting coordinate of an object (usually a crash) expressed in NZMG referred to the WGS84 datum to a precision of 1m.”, so I just need to find out about NZMG. NZTA is not much further help on this, but Land Information New Zealand (LINZ) tells me that this is New Zealand Map Grid, and they even have a convenient online tool to convert NZMG to WGS84 (World Geodetic System 1984). This looks like it is probably latitude and longide as I know them, so what happens when I put the co-ordinates above into the conversion tool?

Errors in the data
Point 1: Coordinates out of range for grid
Point 2: Coordinates out of range for grid

There are no coordinates available to convert.

This is no help at all! So I gave up on map co-ordinates.

How many crashes?

minYear = min(intersectionData$crash_year)
maxYear = max(intersectionData$crash_year)
nCrashes = nrow(intersectionData)

According to the data there were 51 crashes on our intersection between 2000 and 2016.

crashesByYear = table(intersectionData$crash_year)
barplot(
    height = crashesByYear,
    las = 2,
    main = "Number of crashes near K Road and Ponsonby Road intersection",
    xlab = "year",
    ylab = "crashes")

nFatalInjuries = sum(intersectionData$fatal_count)
nSeriousInjuries = sum(intersectionData$seriousinj_count)
nMinorInjuries = sum(intersectionData$minorinj_count)

There have been 0 fatal injuries, 9 serious injuries, and 54 minor injuries, in these 51 crashes.

seriousInjuriesByYear = aggregate(
    formula = seriousinj_count ~ crash_year,
    data = intersectionData,
    FUN = "sum")
minorInjuriesByYear = aggregate(
    formula = minorinj_count ~ crash_year,
    data = intersectionData,
    FUN = "sum")
injuriesByYear = merge(
    x = seriousInjuriesByYear,
    y = minorInjuriesByYear)

barplot(
    height = t(injuriesByYear[, 2:3]),
    args.legend = list(x = "top"),
    beside = TRUE,
    las = 2,
    legend.text = c("serious injuries", "minor injuries"),
    main = "Number of injuries by severity",
    names.arg = injuriesByYear[, 1],
    xlab = "year",
    ylab = "injuries")

nSeriousCrashes =
    sum(intersectionData$seriousinj_count > 0)
nMinorCrashes =
    sum(intersectionData$minorinj_count > 0)

9 crashes resulted in serious injuries, and 43 crashes resulted in minor injuries.

crashesSeriousYear = aggregate(
    formula = seriousinj_count ~ crash_year,
    data = intersectionData[intersectionData$seriousinj_count > 0, ],
    FUN = "length")
crashesSeriousYear = merge(
    x = crashesSeriousYear,
    y = data.frame(crash_year = minYear:maxYear),
    all.y = TRUE)
crashesSeriousYear[is.na(crashesSeriousYear)] = 0

barplot(
    height = crashesSeriousYear[, 2],
    axes = FALSE,
    las = 2,
    main = "Crashes resulting in serious injury",
    names.arg = crashesSeriousYear[, 1],
    xlab = "year",
    ylab = "crashes")
axis(side = 2,
     at = min(crashesSeriousYear[, 2]):max(crashesSeriousYear[, 2]),
     las = 2)

crashesMinorYear = aggregate(
    formula = minorinj_count ~ crash_year,
    data = intersectionData[
        intersectionData$minorinj_count > 0
        & intersectionData$seriousinj_count == 0, ],
    FUN = "length")

barplot(
    height = crashesMinorYear[, 2],
    las = 2,
    main = "Crashes resulting in minor injury",
    names.arg = crashesMinorYear[, 1],
    xlab = "year",
    ylab = "crashes")

crashLocation = paste(
    intersectionData$crash_locn1,
    intersectionData$crash_locn2,
    sep = " & ")
crashLocation = sort(table(crashLocation))

opar = par(mar = c(5.1, 20.1, 4.1, 2.1))
barplot(
    height = crashLocation,
    horiz = TRUE,
    las = 1,
    main = "Crashes by location")

par(opar)