library(knitr)
library(rmarkdown)
library(stringi)
The day before Govhack started I was standing in a shop looking out onto a busy Karangahape Road intersection, talking to the shop owner about how there were lots of traffic accidents on the intersection. Mostly they were trying to convince an employee to cross at the pedestrian crossing rather than shooting between parked cars. Someone suggested that surely information about how many accidents happened at that intersection must be online somewhere, and they searched on Google to find it. They couldn’t find it.
This hack attempts to at least answer the question: how many traffic accidents happen at this intersection?
I will use the disaggregated crash data from NZ Transport Agency (NZTA) to try to answer this question. The data set has been downloaded and is in this repo as a 62MB CSV file, Disaggregated-crash-data.csv. An accompanying metadata file has also been downloaded from NZTA, disaggregated-crash-data-metadata.csv.
## this file is *NOT* in UTF-8. best guess at encoding:
rawData = rawToChar(
readBin("Disaggregated-crash-data.csv", "raw", 100000))
(encoding = stringi::stri_enc_detect(rawData)[[1]]$Encoding[1])
## [1] "ISO-8859-1"
## read the data using this encoding
crashData = read.csv(file = "Disaggregated-crash-data.csv",
fileEncoding = encoding)
summary(crashData)
## CRASH_YEAR CRASH_FIN_YEAR CRASH_SEV fatal_count
## Min. :2000 2007/2008: 12150 F: 5675 Min. :0.00000
## 1st Qu.:2004 2006/2007: 11787 M:137600 1st Qu.:0.00000
## Median :2008 2008/2009: 11530 S: 34027 Median :0.00000
## Mean :2008 2005/2006: 11134 Mean :0.03627
## 3rd Qu.:2012 2009/2010: 11011 3rd Qu.:0.00000
## Max. :2017 2003/2004: 10704 Max. :9.00000
## (Other) :108986
## seriousinj_count minorinj_count MULTI_VEH
## Min. : 0.0000 Min. : 0.00 Multi vehicle :82376
## 1st Qu.: 0.0000 1st Qu.: 1.00 Single vehicle :64202
## Median : 0.0000 Median : 1.00 Vehicle(s)+Pedestrian(s) :16108
## Mean : 0.2349 Mean : 1.08 Vehicle(s)+Cyclist(s) only :12576
## 3rd Qu.: 0.0000 3rd Qu.: 1.00 Cyclists only : 1046
## Max. :12.0000 Max. :34.00 Vehicle(s)+multiple other types: 741
## (Other) : 253
## HOLIDAY LG_REGION_DESC TLA_ID
## Christmas/New Year: 4887 Auckland :51818 Min. : 1.00
## Easter : 2079 Canterbury :22279 1st Qu.: 7.00
## Labour Weekend : 1599 Waikato :20385 Median :25.00
## None :167103 Wellington :15284 Mean :31.35
## Queens Birthday : 1634 Otago :12809 3rd Qu.:53.00
## Manawatu/Wanganui:10372 Max. :75.00
## (Other) :44355
## TLA_NAME AU_ID MB_ID
## Auckland :51813 Min. : 0 Min. : 0
## Christchurch City:14389 1st Qu.:521132 1st Qu.: 663700
## Dunedin City : 7154 Median :542550 Median :1304800
## Wellington City : 6203 Mean :550463 Mean :1464681
## Hamilton City : 5096 3rd Qu.:582800 3rd Qu.:2356700
## Waikato District : 4131 Max. :627201 Max. :3194800
## (Other) :88516 NA's :59 NA's :59
## EASTING NORTHING CRASH_LOCN1
## Min. : 0 Min. : 0 SH 1N : 12665
## 1st Qu.:1613956 1st Qu.:5403987 SH 1S : 6179
## Median :1755778 Median :5714267 SH 2 : 5877
## Mean :1702679 Mean :5598062 SH 3 : 3352
## 3rd Qu.:1793828 3rd Qu.:5911801 SH 6 : 3282
## Max. :2110092 Max. :6189084 SH 16 : 1944
## (Other):144003
## CRASH_LOCN2 OUTDTD_LOCN_DESC CRASH_RP_RS
## SH 1N : 844 Current location :176410 Min. : 0.0
## SH 2 : 533 Outdated Location: 892 1st Qu.: 0.0
## SH 1S : 427 Median : 0.0
## GREAT SOUTH ROAD: 377 Mean : 103.6
## SH 3 : 364 3rd Qu.: 16.0
## VICTORIA ST : 363 Max. :1168.0
## (Other) :174394
## INTERSECTION JUNCTION_TYPE CR_RD_SIDE_RD
## At Landmark : 2686 Driveway :14011 Min. :1.000
## Intersection: 60835 Multi Rd Join : 1310 1st Qu.:1.000
## Unknown :113781 Roundabout : 5490 Median :1.000
## T Type Junction:36272 Mean :1.063
## Unknown :95630 3rd Qu.:1.000
## X Type Junction:22699 Max. :2.000
## Y Type Junction: 1890
## CRASH_DIRN_DESC CRASH_DIST CRASH_RP_DIRN_DESC DIRN_ROLE1_DESC
## :63522 Min. : 0.0 :153996 East :36100
## East :22721 1st Qu.: 0.0 Decreasing: 10315 North :53870
## North:33715 Median : 40.0 Increasing: 12991 South :52382
## South:34212 Mean : 341.4 Unknown: 2
## West :23132 3rd Qu.: 250.0 West :34948
## Max. :34000.0
##
## CRASH_RP_DISP CRASH_SH_DESC CRASH_RP_SH CRASH_RP_NEWS_DESC
## Min. : 0 No :119919 :120924 :168064
## 1st Qu.: 0 Yes: 57383 1N : 13179 Eastbound : 821
## Median : 0 1S : 6731 Northbound: 3769
## Mean : 1988 2 : 6160 Southbound: 3733
## 3rd Qu.: 1803 3 : 3671 Westbound : 915
## Max. :23384 6 : 3365
## (Other): 23272
## INTSN_MIDBLOCK FLAT_HILL ROAD_CHARACTER
## Intersection: 66658 Flat :138219 Bridge : 3504
## Mid Block :110644 Hill : 38807 Motorway Ramp : 1487
## Unknown: 276 Railway Crossing: 461
## Unknown :171850
##
##
##
## ROAD_CURVATURE ROAD_LANE ROAD_MARKINGS
## Easy Curve : 29404 : 61 Centre Line :114242
## Moderate Curve: 27436 1: 11198 No Marks : 20626
## Severe Curve : 5970 2:163767 No Passing Lines: 9486
## Straight Road :114491 O: 2276 Painted Island : 9757
## Unknown : 1 Ped Crossing : 2587
## Raised Island : 20162
## Unknown : 442
## ROAD_SURFACE ROAD_WET NUM_LANES
## Sealed :172442 Dry :132406 Min. :0.000
## Unknown : 1 Ice/ Snow: 2103 1st Qu.:2.000
## Unsealed: 4859 Unknown : 309 Median :2.000
## Wet : 42484 Mean :2.305
## 3rd Qu.:2.000
## Max. :8.000
##
## TRAFFIC_CTRL SPD_LIM ADV_SPD TMP_SPD_LIM
## Give Way Sign :29361 050 :93384 Min. :15.00 Min. : 10.00
## N/A :40981 100 :64294 1st Qu.:45.00 1st Qu.: 30.00
## Nil :80122 080 : 7894 Median :55.00 Median : 30.00
## Points Man : 28 070 : 6269 Mean :57.52 Mean : 43.99
## School Patrol : 100 060 : 3794 3rd Qu.:75.00 3rd Qu.: 50.00
## Stop Sign :11634 030 : 937 Max. :95.00 Max. :100.00
## Traffic Signal:15076 (Other): 730 NA's :169445 NA's :175150
## URBAN DARK_LIGHT LIGHT STREET_LIGHT
## Openroad: 72255 Dark : 55610 Bright Sun:69244 None :31032
## Urban :105047 Light :121579 Dark :47199 Off :36308
## Unknown: 113 Overcast :52335 On :32507
## Twilight : 8411 Unknown:77455
## Unknown : 113
##
##
## WEATHER_A WEATHER_B Post.Or.Pole
## Fine :140174 Frost : 2080 Min. :0.0000
## Heavy Rain: 7485 Strong Wind: 2825 1st Qu.:0.0000
## Light Rain: 25692 Unknown :172397 Median :0.0000
## Mist : 2622 Mean :0.0598
## Snow : 404 3rd Qu.:0.0000
## Unknown : 925 Max. :3.0000
##
## Guard.Rail Water.River Cliff.Bank Ditch
## Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.000000 Median :0.00000 Median :0.00000
## Mean :0.02748 Mean :0.007287 Mean :0.06232 Mean :0.05178
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :4.00000 Max. :2.000000 Max. :3.00000 Max. :3.00000
##
## Fence Tree Kerb Traffic.Island
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.09298 Mean :0.0568 Mean :0.01313 Mean :0.01046
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :3.00000 Max. :3.0000 Max. :3.00000 Max. :2.00000
##
## Parked.Vehicle Traffic.Sign Vehicle Bridge
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.000000
## Mean :0.0524 Mean :0.01967 Mean :0.01054 Mean :0.008212
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :5.0000 Max. :2.00000 Max. :4.00000 Max. :4.000000
##
## Over.Bank Animals Stray.Animal
## Min. :0.00000 Min. :0.0000000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.0000000 1st Qu.:0.000000
## Median :0.00000 Median :0.0000000 Median :0.000000
## Mean :0.02634 Mean :0.0002369 Mean :0.005962
## 3rd Qu.:0.00000 3rd Qu.:0.0000000 3rd Qu.:0.000000
## Max. :3.00000 Max. :1.0000000 Max. :3.000000
##
## Obj.thrown.dropped Debris House.Or.Bldg
## Min. :0.0000000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.0000000 1st Qu.:0.000000 1st Qu.:0.000000
## Median :0.0000000 Median :0.000000 Median :0.000000
## Mean :0.0003553 Mean :0.002143 Mean :0.009825
## 3rd Qu.:0.0000000 3rd Qu.:0.000000 3rd Qu.:0.000000
## Max. :2.0000000 Max. :3.000000 Max. :2.000000
##
## Train Phone.Box.Etc. Slip.Or.Flood
## Min. :0.000000 Min. :0.00000 Min. :0.0000000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.0000000
## Median :0.000000 Median :0.00000 Median :0.0000000
## Mean :0.001229 Mean :0.00432 Mean :0.0007953
## 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.0000000
## Max. :1.000000 Max. :2.00000 Max. :2.0000000
##
## Roadworks Other
## Min. :0.0000000 Min. :0.00000
## 1st Qu.:0.0000000 1st Qu.:0.00000
## Median :0.0000000 Median :0.00000
## Mean :0.0006035 Mean :0.01092
## 3rd Qu.:0.0000000 3rd Qu.:0.00000
## Max. :2.0000000 Max. :2.00000
##
dim(crashData)
## [1] 177302 73
Excellent, 177302 rows in 73 columns of data. What do the variables represent?
## this file is *NOT* in UTF-8. best guess at encoding:
rawMeta = rawToChar(
readBin("disaggregated-crash-data-metadata.csv", "raw", 100000))
(encoding = stringi::stri_enc_detect(rawMeta)[[1]]$Encoding[1])
## [1] "windows-1252"
## read the data using this encoding
crashMetadata = read.csv(file = "disaggregated-crash-data-metadata.csv",
stringsAsFactors = FALSE,
fileEncoding = toupper(encoding))
names(crashMetadata)
## [1] "Variable.Name" "Description" "X" "X.1"
## [5] "X.2"
## what's in columns 3:5?
head(crashMetadata[, 3:5])
## X X.1 X.2
## 1 NA NA NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 NA NA NA
## 6 NA NA NA
## nothing?
all(is.na((crashMetadata)[, 3:5]))
## [1] TRUE
## drop them
crashMetadata = crashMetadata[, 1:2]
## print the table
kable(crashMetadata)
Variable.Name | Description |
---|---|
crash_year | The year in which a crash occurred, if known. |
crash_fin_year | The financial (fin) year in which a crash occurred, if known. |
crash_sev | The severity of a crash. Possible values are ‘F’ (fatal), ‘S’ (serious), ‘M’ (minor). This is determined by the worst injury sustained in the crash at time of entry. |
fatal_count | A count of the number of fatal casualties associated with this crash. |
seriousinj_count | A count of the number of serious injuries (inj) associated with this crash. |
minorinj_count | A count of the number of minor injuries (inj) associated with this crash. |
multi_veh | A variable derived from the number of vehicles which are given roles in the crash. The variable has the following possible values; ‘single vehicle’, ‘multi-vehicle’ , ‘cyclist and vehicle(s)’, ‘pedestrian and vehicle(s)’ , ‘cyclist only’, ‘cyclist(s) and pedestrian(s)’ , ‘vehicle(s) and other’, ‘others, no vehicles’ and ‘other’. ‘Vehicle’ means non-parked vehicle. Parked vehicles are treated as objects in a crash. |
holiday | Indicates where a crash occurred during a ‘Christmas/New Year’, ‘Easter’, ‘Queens Birthday’ or ‘Labour Weekend’ holiday period, otherwise ‘None’. |
lg_region_desc | Identifies the local government (LG) region. The boundaries match territorial local authority (TLA) boundaries in most places. A blank cell occurs where the crash is said to have occurred in a particular TLA and the LG boundaries do not match. |
tla_id | The unique identifier for a territorial local authority (TLA). Each crash is assigned a TLA based on where the crash occurred. |
tla_name | The name of the territorial local authority (TLA) the crash has been attributed. |
au_id | The unique identifier of an area unit. |
mb_id | The unique identifier of a meshblock. |
easting | The easting coordinate of an object (usually a crash) expressed in NZMG referred to the WGS84 datum to a precision of 1m. |
northing | The northing coordinate of an object (usually a crash) expressed in NZMG referred to the WGS84 datum to a precision of 1m. |
crash_locn1 | Part 1 of the ‘crash location’ (crash_locn). May be a road name, route position (RP), landmark, or other, e.g. ‘Ninety Mile Beach’. Used for location descriptions in reports etc. |
crash_locn2 | Part 2 of the ‘crash location’ (crash_locn). May be a side road name, landmark etc. Used for location descriptions in reports etc. |
outdtd_locn_desc | Indicates if the location for this crash is an ‘Outdated Location’(outdtd_locn) or ‘Current location’. A crash is said to have an ‘Outdated location’ where the road might have moved, or does not exist anymore. |
crash_rp_rs | The ‘reference station’ (RS) for the ‘route position’ (RP) of a crash. |
intersection | Indicate if a crash happened at an ‘Intersection’, ‘At Landmark’ or ‘Unknown’. |
junction_type | The type of junction the crash happened at. Possible road junctions include ‘Driveway’, ‘Roundabout’, ‘X Type Junction’, ‘T Type Junction’, ‘Y Type Junction’, or ‘Multi Road Join’. The junction type may also be unknown. Note crashes that did not occur at a junction are also given a value of unknown. |
cr_rd_side_rd | Indicates whether the principal vehicle in a crash was on the crash road (cr_rd) [1] or side road (sd_rd) [2] at the time of the crash. Note that ‘on side road’ (2) can only happen if the crash occurred at an intersection. |
crash_dirn_desc | The direction (dirn) of the crash from the reference point. Values possible are ‘North’, ‘East’, ‘South’ or ‘West’. |
crash_dist | The distance (dist) of the crash from the reference point for the crash. The reference point is often the intersection of ‘crash road’ and ‘side road’ (refer to ‘cr_rd_sd_rd’ variable). |
crash_rp_dirn_desc | Indicates the direction of travel (where known) on a State Highway (SH) with respect to the highway origin. Possible values include ‘Increasing’ where the crash occurred in increasing distance from SH origin, ‘Decreasing’ where the crash occurred in decreasing distance to the SH origin, or blank. |
dirn_role1_desc | The direction (dirn) of the principal vehicle involved in the crash. Possible values are North, South, East or West. |
crash_rp_disp | The displacement (disp) of the crash from a reference station (RS). Part of the crash route position (RP). |
crash_sh_desc | Indicates where a crash is reported to have occurred on a State Highway (SH). Possible values include ‘Yes’ where the crash occurred on a SH, otherwise ‘No’. |
crash_rp_sh | The State Highway (SH) on which a crash occurred. This is part of a ‘route position’ (RP) for the crash. Possible values can be any valid natural SH designation reference (e.g. ‘1N’ is SH1 in the North Island.). |
crash_rp_news_desc | Where the crash occurred on a median-divided State Highway (SH), this flag indicates which side of the median the crash happened. Values ‘Northbound’, ‘Southbound, ’Eastbound’ or ‘Westbound’. |
intsn_midblock | A derived variable to indicate if a crash occured at an intersection (intsn) or not. The ‘intsn_midblock’ variable is calculated using the ‘intersection’ and ‘junction_type’ variables. Values are ‘Intersection’ (where intersection variable = ‘Intersection’ or {‘Intersection’ = ‘At Landmark’ and junction_type is not in (‘Unknown’ or ‘Driveway’)} OR {Intersection = ‘Unknown’ and crash_dist <= 10}), otherwise ‘Midblock’ for crashes not meeting the criteria for ‘Intersection’). |
flat_hill | Whether the road is flat or sloped. Possible values include ‘Flat or ’Hill’. |
road_character | The general nature of the road. Possible values include ‘Bridge’, ‘Motorway Ramp’, ‘Railway Crossing’ or ‘Unknown’. |
road_curvature | The curvature of the road. Possible values include ‘Straight Road’, ‘Easy Curve’, ‘Moderate Curve’ or ‘Severe curve’. |
road_lane | The lane configuration of the road. Possible values : ‘1’ (one way), ‘2’ (two way), ‘M’ (for where a median exists), ‘O’ (for off-road lane configuations), ‘’ ( for unknown or invalid configuarations). |
road_markings | The road markings at the crash site. Possible values: ‘Ped Crossing’ (for pedestrian crossings), ‘Raised Island’, ‘Painted Island’, ‘No Passing Lanes’, ‘Centre Line’, ‘No Marks’ or ‘Unknown’. |
road_surface | The road surface description applying at the crash site. Possible values: ‘Sealed’ or ‘Unsealed’. |
road_wet | The road wetness at the time and place of the crash. Possible values: ‘Wet’, ‘Dry’, ‘Ice/Snow’ or ‘Unknown’ |
num_lanes | The number(num) of lanes on the crash road. |
traffic_ctrl | The traffic control (ctrl) signals at the crash site. Possible values are ‘Traffic Signals’, ‘Stop Sign’, ‘Give Way Sign’, ‘Pointsman’, ‘School Patrol’, ‘Nil’ or ‘N/A’. |
spd_lim | The speed (spd) limit (lim) in force at the crash site at the time of the crash. May be a number, or ‘LSZ’ for a limited speed zone. |
adv_spd | The advisory (adv) speed (spd) at the crash site at the time of the crash. |
tmp_spd_lim | The temporary (temp) speed (spd) limit (lim) at the crash site if one exists (e.g. for road works). |
urban | A derived variable using the ‘spd_lim’ variable. Possible values are ‘Urban’ (urban, spd_lim < 80) or ‘Open Road’ (open road, spd_lim >=80 or ‘LSZ’). |
dark_light | A variable derived from the ‘light’ variable. Values ‘Dark’ (if ‘light’ = ‘Dark’ or ‘Twilight’), ‘Light’ ( ‘light’ = ‘Bright’, ‘Overcast’) or ‘Unknown’ (light = ‘’). |
light | The light at the time and place of the crash. Possible values: ‘Bright Sun’, ‘Overcast’, ‘Twilight, ’Dark’ or ‘Unknown’. |
street_light | The street lighting at the time of the crash. Possible values ‘On’, ‘Off’, ‘None’ or ‘Unknown’. |
weather_a | Indicates weather at the crash time/place. See wthr_b. Values that are possible are ‘Fine’, ‘Mist’, ‘Light Rain’, ‘Heavy Rain’, ‘Snow’, ‘Unknown’. |
weather_b | The weather at the crash time/place. See weather_a. Values ‘Frost’, ‘Strong Wind’ or ‘Unknown’. |
Post or Pole | Derived variable to indicate how many times a post or pole was struck in the crash. This includes light, power, phone, utility poles and objects practically forming part of a pole (i.e. ‘Transformer Guy’ wires) |
Guard Rail | Derived variable to indicate how many times a guard or guard rail was struck in the crash. This includes ‘New Jersey’ barriers, ‘ARMCO’, sand filled barriers, wire catch fences, etc. |
Water/River | Derived variable to indicate how many times a body of water (including rivers, streams, lakes, the sea, tidal flates, canals, watercourses or swanps) was struck in the crash. |
Cliff Bank | Derived variable to indicate how many times a ‘cliff’ or ‘bank’ was struck in the crash. This includes retaining walls |
Ditch | Derived variable to indicate how many times a ‘ditch’ or ‘waterable drainage channel’ was struck in a crash. |
Fence | Derived variable to indicate how many times a ‘fence’ was struck in the crash. This includes letterbox(es), hoardings, private roadside furniture, hedges, sight rails, etc. |
Kerb | Derived variable to indicate how many times a kerb was struck in the crash, that contributed directly to the crash. |
Traffic Island | Derived variable to indicate how many times a traffic island, medians (excluding barriers)was struck in the crash. |
Parked Vehicle | Derived variable to indicate how many times a parked or unattended vehicle was struck in the crash. This variable can include trailers. |
Traffic Sign | Derived variable to indicate how many times ‘traffic signage’ (including traffic signals, their poles, bollards or roadside delineators) was struck in the crash. |
Vehicle | Derived variable to indicate how many times a stationary attended vehicle was struck in the crash. This includes broken down vehicles, workmen’s vehicles, taxis, buses. |
Bridge | Derived variable to indicate how many times a bridge, tunnel, the abutments, handrails were struck in the crash. |
Over Bank | Derived variable to indicate how many times an embankment was struck or driven over during a crash. This variable includes other vertical drops driven over during a crash. |
Animals | Derived variable to indicate how many times an ‘Animal(s)’ was struck in the crash. This is used where the animals, being driven or led, were under control. |
Stray Animal | Derived variable to indicate how many times a stray animal(s) was struck in the crash. This variable includes wild animals such as pigs, goats, deer, straying farm animals, house pets and birds. |
Obj thrown/dropped | Derived variable to indicate how many times objects were thrown at or dropped on vehicles in the crash. |
Debris | Derived variable to indicate how many times debris, boulders or items dropped or thrown from a vehicle(s) were struck in the crash |
House Or Bldg | Derived variable to indicate how many times a houses, garages, sheds or other buildings(Bldg) were struck in the crash |
Train | Derived variable to indicate how many times a train, rolling stock or jiggers was struck in the crash, whether stationary or moving |
Phone Box Etc | Derived variable to indicate how many times a telephone kiosk traffic signal controllers, bus shelters or other public furniture was struck in the crash |
Slip Or Flood | Derived variable to indicate how many times landslips, washouts or floods (excluding rivers) were objects struck in the crash |
Roadworks | Derived variable to indicate how many times an object associated with ‘roadworks’ (including signs, cones, drums, barriers, but not roadwork vehicles) was struck during the crash |
Trees | Derived variable to indicate how many times trees or other growing items were struck during the crash. |
Other | Derived variable to indicate how many times an object was struck in a crash and the object struck was not pre-defined. This variable includes stockpiled materials, rubbish bins, fallen poles, fallen trees, etc. |
Keen spotters will notice that the metadata and the column names have inconsistent capitalisation of variables, so I’ll just make all the names of my data rows lower case to prevent confusion (maybe).
names(crashData) = tolower(names(crashData))
Useful things I can use:
crash_year
between 2000 and 2017
fatal_count
, seriousinj_count
, minorinj_count
for counts of fatalities, serious injuries, and minor injuries in each crash
crash_locn1
and crash_locn2
should give the street names of interest.
initially easting
and northing
looked promising, but these numbers do not seem to correspond to the conversion available at http://www.linz.govt.nz/data/geodetic-system/datums-projections-heights/projections/new-zealand-map-grid-nzmg.
The intersection in question is Karangahape and Newton Roads, Newton, Auckland. The street names I will search for are:
streetNames = toupper(c(
"Abbey St", # for some reason street = "ST"
"Great North Road", # while road = "ROAD"
"Gundry St", # go figure
"Karangahape Road",
"Newton Road",
"Ponsonby Road"))
## check street names occure in the data
all(sapply(X = streetNames, FUN = function(s) { s %in% crashData$crash_locn1 }))
## [1] TRUE
all(sapply(X = streetNames, FUN = function(s) { s %in% crashData$crash_locn2 }))
## [1] TRUE
I will take a subset of the data where one of these street names occurs in both crash_locn1
and crash_locn2
.
intersectionData = crashData[
crashData$crash_locn1 %in% streetNames
& crashData$crash_locn2 %in% streetNames, ]
## check that we're in the right part of the country
unique(intersectionData[, c("lg_region_desc", "tla_name")])
## lg_region_desc tla_name
## 1085 Auckland Auckland
## that said Auckland, right? ✓
Why didn’t I use the northing
and easting
variables to do some fancy map stuff. Because I have no idea what the heck they are! Somebody from NZTA, please, explain to me how I can convert these numbers into map co-ordinates for modern internet humans! Let me explain.
Here the extreme northing
and easting
values for the rows in my data subset:
(mapcoords = c(
minNorth = min(intersectionData$northing),
minEast = min(intersectionData$easting),
maxNorth = max(intersectionData$northing),
maxEast = max(intersectionData$easting)
))
## minNorth minEast maxNorth maxEast
## 5919188 1756123 5919343 1756399
The metadata tells us the that easting
is “The easting coordinate of an object (usually a crash) expressed in NZMG referred to the WGS84 datum to a precision of 1m.”, so I just need to find out about NZMG. NZTA is not much further help on this, but Land Information New Zealand (LINZ) tells me that this is New Zealand Map Grid, and they even have a convenient online tool to convert NZMG to WGS84 (World Geodetic System 1984). This looks like it is probably latitude and longide as I know them, so what happens when I put the co-ordinates above into the conversion tool?
Errors in the data
Point 1: Coordinates out of range for grid
Point 2: Coordinates out of range for gridThere are no coordinates available to convert.
This is no help at all! So I gave up on map co-ordinates.
minYear = min(intersectionData$crash_year)
maxYear = max(intersectionData$crash_year)
nCrashes = nrow(intersectionData)
According to the data there were 51 crashes on our intersection between 2000 and 2016.
crashesByYear = table(intersectionData$crash_year)
barplot(
height = crashesByYear,
las = 2,
main = "Number of crashes near K Road and Ponsonby Road intersection",
xlab = "year",
ylab = "crashes")
nFatalInjuries = sum(intersectionData$fatal_count)
nSeriousInjuries = sum(intersectionData$seriousinj_count)
nMinorInjuries = sum(intersectionData$minorinj_count)
There have been 0 fatal injuries, 9 serious injuries, and 54 minor injuries, in these 51 crashes.
seriousInjuriesByYear = aggregate(
formula = seriousinj_count ~ crash_year,
data = intersectionData,
FUN = "sum")
minorInjuriesByYear = aggregate(
formula = minorinj_count ~ crash_year,
data = intersectionData,
FUN = "sum")
injuriesByYear = merge(
x = seriousInjuriesByYear,
y = minorInjuriesByYear)
barplot(
height = t(injuriesByYear[, 2:3]),
args.legend = list(x = "top"),
beside = TRUE,
las = 2,
legend.text = c("serious injuries", "minor injuries"),
main = "Number of injuries by severity",
names.arg = injuriesByYear[, 1],
xlab = "year",
ylab = "injuries")
nSeriousCrashes =
sum(intersectionData$seriousinj_count > 0)
nMinorCrashes =
sum(intersectionData$minorinj_count > 0)
9 crashes resulted in serious injuries, and 43 crashes resulted in minor injuries.
crashesSeriousYear = aggregate(
formula = seriousinj_count ~ crash_year,
data = intersectionData[intersectionData$seriousinj_count > 0, ],
FUN = "length")
crashesSeriousYear = merge(
x = crashesSeriousYear,
y = data.frame(crash_year = minYear:maxYear),
all.y = TRUE)
crashesSeriousYear[is.na(crashesSeriousYear)] = 0
barplot(
height = crashesSeriousYear[, 2],
axes = FALSE,
las = 2,
main = "Crashes resulting in serious injury",
names.arg = crashesSeriousYear[, 1],
xlab = "year",
ylab = "crashes")
axis(side = 2,
at = min(crashesSeriousYear[, 2]):max(crashesSeriousYear[, 2]),
las = 2)
crashesMinorYear = aggregate(
formula = minorinj_count ~ crash_year,
data = intersectionData[
intersectionData$minorinj_count > 0
& intersectionData$seriousinj_count == 0, ],
FUN = "length")
barplot(
height = crashesMinorYear[, 2],
las = 2,
main = "Crashes resulting in minor injury",
names.arg = crashesMinorYear[, 1],
xlab = "year",
ylab = "crashes")
crashLocation = paste(
intersectionData$crash_locn1,
intersectionData$crash_locn2,
sep = " & ")
crashLocation = sort(table(crashLocation))
opar = par(mar = c(5.1, 20.1, 4.1, 2.1))
barplot(
height = crashLocation,
horiz = TRUE,
las = 1,
main = "Crashes by location")
par(opar)