英文:
java stream custom summary statistics
问题
以下是翻译好的部分:
这是一个非常大的CSV文件的示例:
id, type, profit, purchaseDate, soldDate
order1, fruit, 115.50, 1/1/2020, 20/1/2020
order2, veg, 114.25, 7/1/2020, 7/2/2020
order3, flowers, 113.30, 5/1/2020, 15/1/2020
order4, fruit, 111.20, 1/1/2019, 30/1/2019
order5, veg, 112.40, 17/1/2019,10/2/2019
我需要读取这个非常大的文件并生成以下统计摘要:
- 按项目的利润
- 具有最高订单数量的年份
- 购买和销售日期之间的平均时间间隔
我可以一次执行一个统计操作。我已经使用了Commons CSV解析器:
Reader in = new FileReader("filePath");
Iterable<CSVRecord> records = CSVFormat.DEFAULT
.withFirstRecordAsHeader()
.withIgnoreEmptyLines(true)
.withDelimiter(',')
.withTrim()
.parse(in);
StreamSupport
.stream(records.spliterator(), false)
.collect(groupingBy(r -> r.get("type"), averagingDouble(r -> Double.parseDouble(r.get("profit")))));
我希望看看是否可以使用Java流式API进行单次扫描获取多个统计信息,而不会因为文件太大而导致内存超载。
英文:
The following is a sample of a very large CSV file:
id, type, profit, purchaseDate, soldDate
order1, fruit, 115.50, 1/1/2020, 20/1/2020
order2, veg, 114.25, 7/1/2020, 7/2/2020
order3, flowers, 113.30, 5/1/2020, 15/1/2020
order4, fruit, 111.20, 1/1/2019, 30/1/2019
order5, veg, 112.40, 17/1/2019,10/2/2019
I need to read this very large file and produce the following summary of statistics:
- item wise profits
- year with highest orders
- avg time between purchase and sale dates
I can do one stat at a time.I have used commons CSV parser:
Reader in = new FileReader("filePath");
Iterable<CSVRecord> records = CSVFormat.DEFAULT
.withFirstRecordAsHeader()
.withIgnoreEmptyLines(true)
.withDelimiter(',')
.withTrim()
.parse(in);
StreamSupport
.stream(records.spliterator(), false)
.collect(groupingBy(r -> r.get("type"),averagingDouble(r -> Double.parseDouble(r.get("profit")))));
I am looking to see if we can get multiple stats using Java streaming api with a single sweep and without memory overload as this is a very large file.
答案1
得分: 0
以下是您要翻译的代码部分:
public class CustomSummaryStatistics implements Consumer<Order> {
private Map<String, BigDecimal> itemWiseProfits = new HashMap<>();
private Optional<Map.Entry<Integer, Integer>> yearWithMaxOrders;
private int avgDaysBetweenOrderDateAndShipDate;
private Map<Integer, Integer> yearWiseOrdersMap = new HashMap<>();
private long totalDaysBetweenOrderDateAndShipDate;
private long numRecords;
public static Collector<Order, ?, CustomSummaryStatistics> newCollector() {
return Collector.of(CustomSummaryStatistics::new, CustomSummaryStatistics::accept,
CustomSummaryStatistics::combine, CustomSummaryStatistics::finisher);
}
@Override
public void accept(Order order) {
updateItemWiseProfits(order);
updateYearWithHighestOrders(order);
updateAvgDaysBetweenOrderDateAndShipDate(order);
}
private void updateItemWiseProfits(Order order) {
itemWiseProfits.merge(order.getItemType(), order.getTotalProfit(), BigDecimal::add);
}
private void updateYearWithHighestOrders(Order order) {
yearWiseOrdersMap.merge(order.getOrderDate().getYear(), 1, Integer::sum);
}
private void updateAvgDaysBetweenOrderDateAndShipDate(Order order) {
numRecords++;
totalDaysBetweenOrderDateAndShipDate += Period.between(order.getOrderDate(), order.getShipDate()).getDays();
}
public CustomSummaryStatistics combine(CustomSummaryStatistics other) {
other.itemWiseProfits.forEach((k, v) -> itemWiseProfits.merge(k, v, BigDecimal::add));
other.yearWiseOrdersMap.forEach((k, v) -> yearWiseOrdersMap.merge(k, v, Integer::sum));
numRecords += other.numRecords;
totalDaysBetweenOrderDateAndShipDate += other.totalDaysBetweenOrderDateAndShipDate;
return this;
}
public CustomSummaryStatistics finisher() {
yearWithMaxOrders = yearWiseOrdersMap.entrySet().stream()
.max(Map.Entry.comparingByValue(Comparator.comparing(entry -> entry.longValue())));
avgDaysBetweenOrderDateAndShipDate = (int) (totalDaysBetweenOrderDateAndShipDate / numRecords);
return this;
}
public Map<String, BigDecimal> getItemWiseProfits() {
return itemWiseProfits;
}
public int getAvgDaysBetweenOrderDateAndShipDate() {
return avgDaysBetweenOrderDateAndShipDate;
}
public Optional<Map.Entry<Integer, Integer>> getYearWithMaxOrders() {
return yearWithMaxOrders;
}
public long getNumRecords() {
return numRecords;
}
@Override
public String toString() {
return "CustomSummaryStatistics{" +
"itemWiseProfits=" + itemWiseProfits +
", yearWithMaxOrders=" + yearWithMaxOrders +
", avgDaysBetweenOrderDateAndShipDate=" + avgDaysBetweenOrderDateAndShipDate +
", numRecords=" + numRecords +
'}';
}
}
@Data
@NoArgsConstructor
@AllArgsConstructor
public class Order {
@NotBlank
private String itemType;
@NotNull
private LocalDate orderDate;
@NotNull
private LocalDate shipDate;
@NotNull
private BigDecimal totalProfit;
}
public class SalesReport {
private static final DateTimeFormatter df = DateTimeFormatter.ofPattern("M/d/y");
private static final Validator validator = Validation.buildDefaultValidatorFactory().getValidator();
public static void main(String[] args) {
long start = System.currentTimeMillis();
CustomSummaryStatistics stats;
try {
stats = calculateSummaryStats("data/SalesRecords.csv");
System.out.println(stats.toString());
} catch (IOException ioe) {
ioe.printStackTrace();
}
System.out.println("Time(milli-seconds) taken to generate Sales Report : " + (System.currentTimeMillis() - start));
}
public static CustomSummaryStatistics calculateSummaryStats(String filePath) throws IOException {
Reader in = new FileReader(filePath);
Iterable<CSVRecord> iterable =
CSVFormat.DEFAULT
.withFirstRecordAsHeader()
.withIgnoreEmptyLines(true)
.withDelimiter(',')
.withTrim()
.parse(in);
return StreamSupport
.stream(iterable.spliterator(), true)
.map(csvRecord -> toOrder(csvRecord))
.filter(order -> order != null)
.collect(CustomSummaryStatistics.newCollector());
}
// map and validate
public static Order toOrder(CSVRecord record) {
Order order = new Order();
try {
order.setItemType(record.get("Item Type"));
order.setOrderDate(LocalDate.parse(record.get("Order Date"), df));
order.setShipDate(LocalDate.parse(record.get("Ship Date"), df));
order.setTotalProfit(new BigDecimal(record.get("Total Profit")));
//validate
Set violations = validator.validate(order);
if (!violations.isEmpty()) throw new Exception("Failed validation:" + violations.toString());
} catch (Exception e) {
System.out.println("Error with row: " + record.toString() + e.getMessage());
return null;
}
return order;
}
}
英文:
public class CustomSummaryStatistics implements Consumer<Order> {
private Map<String, BigDecimal> itemWiseProfits = new HashMap<>();
private Optional<Map.Entry<Integer, Integer>> yearWithMaxOrders;
private int avgDaysBetweenOrderDateAndShipDate;
private Map<Integer, Integer> yearWiseOrdersMap = new HashMap<>();
private long totalDaysBetweenOrderDateAndShipDate;
private long numRecords;
public static Collector<Order, ?, CustomSummaryStatistics> newCollector() {
return Collector.of(CustomSummaryStatistics::new, CustomSummaryStatistics::accept,
CustomSummaryStatistics::combine, CustomSummaryStatistics::finisher);
}
@Override
public void accept(Order order) {
updateItemWiseProfits(order);
updateYearWithHighestOrders(order);
updateAvgDaysBetweenOrderDateAndShipDate(order);
}
private void updateItemWiseProfits(Order order) {
itemWiseProfits.merge(order.getItemType(), order.getTotalProfit(), BigDecimal::add);
}
private void updateYearWithHighestOrders(Order order) {
yearWiseOrdersMap.merge(order.getOrderDate().getYear(), 1, Integer::sum);
}
private void updateAvgDaysBetweenOrderDateAndShipDate(Order order) {
numRecords++;
totalDaysBetweenOrderDateAndShipDate += Period.between(order.getOrderDate(), order.getShipDate()).getDays();
}
public CustomSummaryStatistics combine(CustomSummaryStatistics other) {
other.itemWiseProfits.forEach((k, v) -> itemWiseProfits.merge(k, v, BigDecimal::add));
other.yearWiseOrdersMap.forEach((k, v) -> yearWiseOrdersMap.merge(k, v, Integer::sum));
numRecords += other.numRecords;
totalDaysBetweenOrderDateAndShipDate += other.totalDaysBetweenOrderDateAndShipDate;
return this;
}
public CustomSummaryStatistics finisher() {
yearWithMaxOrders = yearWiseOrdersMap.entrySet().stream()
.max(Map.Entry.comparingByValue(Comparator.comparing(entry -> entry.longValue())));
avgDaysBetweenOrderDateAndShipDate = (int) (totalDaysBetweenOrderDateAndShipDate / numRecords);
return this;
}
public Map<String, BigDecimal> getItemWiseProfits() {
return itemWiseProfits;
}
public int getAvgDaysBetweenOrderDateAndShipDate() {
return avgDaysBetweenOrderDateAndShipDate;
}
public Optional<Map.Entry<Integer, Integer>> getYearWithMaxOrders() {
return yearWithMaxOrders;
}
public long getNumRecords() {
return numRecords;
}
@Override
public String toString() {
return "CustomSummaryStatistics{" +
"itemWiseProfits=" + itemWiseProfits +
", yearWithMaxOrders=" + yearWithMaxOrders +
", avgDaysBetweenOrderDateAndShipDate=" + avgDaysBetweenOrderDateAndShipDate +
", numRecords=" + numRecords +
'}';
}
}
@Data
@NoArgsConstructor
@AllArgsConstructor
public class Order {
@NotBlank
private String itemType;
@NotNull
private LocalDate orderDate;
@NotNull
private LocalDate shipDate;
@NotNull
private BigDecimal totalProfit;
}
public class SalesReport {
private static final DateTimeFormatter df = DateTimeFormatter.ofPattern("M/d/y");
private static final Validator validator = Validation.buildDefaultValidatorFactory().getValidator();
public static void main(String[] args) {
long start = System.currentTimeMillis();
CustomSummaryStatistics stats;
try {
stats = calculateSummaryStats("data/SalesRecords.csv");
System.out.println(stats.toString());
} catch (IOException ioe) {
ioe.printStackTrace();
}
System.out.println("Time(milli-seconds) taken to generate Sales Report : " + (System.currentTimeMillis() - start));
}
public static CustomSummaryStatistics calculateSummaryStats(String filePath) throws IOException {
Reader in = new FileReader(filePath);
Iterable<CSVRecord> iterable =
CSVFormat.DEFAULT
.withFirstRecordAsHeader()
.withIgnoreEmptyLines(true)
.withDelimiter(',')
.withTrim()
.parse(in);
return StreamSupport
.stream(iterable.spliterator(), true)
.map(csvRecord -> toOrder(csvRecord))
.filter(order -> order != null)
.collect(CustomSummaryStatistics.newCollector());
}
// map and validate
public static Order toOrder(CSVRecord record) {
Order order = new Order();
try {
order.setItemType(record.get("Item Type"));
order.setOrderDate(LocalDate.parse(record.get("Order Date"), df));
order.setShipDate(LocalDate.parse(record.get("Ship Date"), df));
order.setTotalProfit(new BigDecimal(record.get("Total Profit")));
//validate
Set violations = validator.validate(order);
if (!violations.isEmpty()) throw new Exception("Failed validation:" + violations.toString());
} catch (Exception e) {
System.out.println("Error with row: " + record.toString() + e.getMessage());
return null;
}
return order;
}
}
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论